1 Information

Title: Extensive diversification of amphibian skin micro- and mycobiomes upon rewilding, with limited inter-domain interactions or effects of dietary êžµ-carotene Authors: Alice Risely, Phillip G. Byrne, David A, Hunter, Ana S. Carranco, Bethany J. Hoye, and Aimee J. Silla

2 Abstract

The composition and dynamics of the skin bacterial and fungal microbiome is thought to influence host-pathogen defence. This microbial community is shaped by host captivity, diet, and microbial interactions between bacterial and fungal components. However, there remains little understanding of how specific micronutrients influence bacterial and fungal microbiome composition and their inter-domain interactions during rewilding of captive-bred animals. This study experimentally investigated the effect of dietary beta-carotene supplementation and subsequent field release on bacterial and fungal microbiome composition and dynamics using the Southern Corroboree frog (Pseudophryne corroboree) as a model system. We found large-scale diversification of bacterial communities post-release and similar diversification of fungal communities. The rewilded fungal mycobiome was more transient and demonstrated stronger temporal and micro-spatial fluctuations than the bacterial microbiome. Accounting for temporal and spatial factors, we found strong residual associations between bacterial members, yet limited evidence for inter-domain associations, suggesting that co-occurrence patterns between bacterial and fungal communities are largely a result of shared responses to the environment rather than direct interactions. Lastly, we found supplementation of dietary beta-carotene in captivity had no impact on post-release microbiome diversity, yet was associated with approximately 15% of common bacterial and fungal genera. Our research demonstrates that environmental factors play a dominant role over dietary beta-carotene supplementation in shaping microbiome diversity post-release, and suggest inter-domain interactions may also only exert a minor influence. Further research on the function and ecology of skin bacterial and fungal microbiomes will be crucial for developing strategies to support survival of endangered amphibian species.

Analyasis

2.1 Load packages

library(phyloseq)
library(ggplot2)
library(vegan)
library(dplyr)
library(scales)
library(grid)
library(reshape2)
library(ape)
library(gridExtra)
library(ade4)
library(plyr)
library(tidyr)
library(data.table)
library(stringr)
library(ggrepel)
library(decontam)
library(ranacapa)
library(patchwork)
library(ggvenn)
library(gllvm)
library(ggvenn)
library(ggpubr)
library(viridis)
library(NetCoMi)
library(ggcorrplot)
library(igraph)
library(ggnetwork)
library(microbiomeutilities)
library(pheatmap)
library(RColorBrewer)
library(glmmTMB)
library(sjPlot)
library(wesanderson)

3 Data import and cleaning

Note this data has gone through some rudimentary processing and filtering.

setwd("C:/Users/risel/Dropbox/Academic projects/Frog microbiome UOW/Frogs_UOW/Longitudinal project/Analysis/Published")

frog_16S <- readRDS("corroboree_16S.RDS")
frog_ITS <- readRDS("corroboree_ITS.RDS")

frog_16S 
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 10024 taxa and 166 samples ]
## sample_data() Sample Data:       [ 166 samples by 14 sample variables ]
## tax_table()   Taxonomy Table:    [ 10024 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 10024 tips and 10023 internal nodes ]
frog_ITS
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1687 taxa and 167 samples ]
## sample_data() Sample Data:       [ 167 samples by 14 sample variables ]
## tax_table()   Taxonomy Table:    [ 1687 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1687 tips and 1686 internal nodes ]

3.1 Study design

ggplot(sample_data(frog_16S), aes(y = frog_id, x = TimePoint))+
  geom_line(aes(group = frog_id), alpha = 0.5)+
  geom_point(pch = 21, size = 2, width = 0.05, fill = "lightblue")+
  theme_bw()

3.2 Filter samples with low read depth

frog_16S<-subset_samples(frog_16S, Seq_depth> 2000)
frog_ITS<-subset_samples(frog_ITS, Seq_depth> 1000)

3.3 Change ASV names

##### change names

## change ASV names 

taxtable<-data.frame(tax_table(frog_16S))
taxtable$Taxa<-row.names(taxtable)
head(taxtable)
taxtable$ASV<-1:nrow(taxtable)
taxtable$New_Taxa<-paste("B", "ASV", taxtable$ASV)

taxa_names(frog_16S)<-taxtable$New_Taxa

############################# fungi ###########

taxtable<-data.frame(tax_table(frog_ITS))
taxtable$Taxa<-row.names(taxtable)
head(taxtable)
taxtable$ASV<-1:nrow(taxtable)
taxtable$New_Taxa<-paste("F", "ASV", taxtable$ASV)

taxa_names(frog_ITS)<-taxtable$New_Taxa

3.4 Rarefy

### rarefaction curves ##########
### rarefaction curves ##########

rarefaction_V4<- ggrare(frog_16S, step = 500, se = TRUE)+xlim(c(0,5000))+ggtitle("Bacteria")+geom_vline(xintercept = 2000, linetype = "dashed")
## rarefying sample 1-T0
## rarefying sample 10-T0
## rarefying sample 100-T1
## rarefying sample 101-T2
## rarefying sample 102-T2
## rarefying sample 104-T3
## rarefying sample 108-T1
## rarefying sample 109-T2
## rarefying sample 11-T1
## rarefying sample 112-T3
## rarefying sample 113-T0
## rarefying sample 114-T0
## rarefying sample 115-T1
## rarefying sample 116-T1
## rarefying sample 118-T2
## rarefying sample 119-T3
## rarefying sample 12-T1
## rarefying sample 120-T3
## rarefying sample 121-T0
## rarefying sample 122-T0
## rarefying sample 123-T1
## rarefying sample 126-T2
## rarefying sample 128-T3
## rarefying sample 129-T0
## rarefying sample 13-T2
## rarefying sample 131-T1
## rarefying sample 132-T1
## rarefying sample 133-T2
## rarefying sample 134-T2
## rarefying sample 136-T3
## rarefying sample 15-T3
## rarefying sample 19-T1
## rarefying sample 2-T0
## rarefying sample 21-T2
## rarefying sample 24-T3
## rarefying sample 25-T0
## rarefying sample 26-T0
## rarefying sample 27-T1
## rarefying sample 28-T1
## rarefying sample 29-T2
## rarefying sample 30-T2
## rarefying sample 31-T3
## rarefying sample 32-T3
## rarefying sample 33-T0
## rarefying sample 34-T0
## rarefying sample 35-T1
## rarefying sample 39-T3
## rarefying sample 4-T1
## rarefying sample 40-T3
## rarefying sample 41-T0
## rarefying sample 43-T1
## rarefying sample 45-T2
## rarefying sample 46-T2
## rarefying sample 47-T3
## rarefying sample 53-T2
## rarefying sample 55-T3
## rarefying sample 56-T3
## rarefying sample 58-T0
## rarefying sample 6-T2
## rarefying sample 60-T1
## rarefying sample 62-T2
## rarefying sample 63-T3
## rarefying sample 65-T0
## rarefying sample 66-T0
## rarefying sample 67-T1
## rarefying sample 68-T1
## rarefying sample 69-T2
## rarefying sample 71-T3
## rarefying sample 73-T0
## rarefying sample 77-T2
## rarefying sample 79-T3
## rarefying sample 8-T3
## rarefying sample 89-T0
## rarefying sample 91-T1
## rarefying sample 92-T1
## rarefying sample 95-T3
## rarefying sample 96-T3
## rarefying sample 98-T0
## rarefying sample R1
## rarefying sample R10
## rarefying sample R12
## rarefying sample R13
## rarefying sample R14
## rarefying sample R15
## rarefying sample R16
## rarefying sample R17
## rarefying sample R18
## rarefying sample R2
## rarefying sample R21
## rarefying sample R22
## rarefying sample R24
## rarefying sample R25
## rarefying sample R26
## rarefying sample R27
## rarefying sample R28
## rarefying sample R29
## rarefying sample R3
## rarefying sample R31
## rarefying sample R32
## rarefying sample R33
## rarefying sample R34
## rarefying sample R35
## rarefying sample R36
## rarefying sample R37
## rarefying sample R4
## rarefying sample R5
## rarefying sample R8
## rarefying sample R9
## rarefying sample RR1
## rarefying sample RR10
## rarefying sample RR11
## rarefying sample RR12
## rarefying sample RR13
## rarefying sample RR14
## rarefying sample RR15
## rarefying sample RR16
## rarefying sample RR17
## rarefying sample RR2
## rarefying sample RR3
## rarefying sample RR4
## rarefying sample RR5
## rarefying sample RR6
## rarefying sample RR8
## rarefying sample RR9

rarefaction_ITS<- ggrare(frog_ITS, step = 500, se = TRUE)+xlim(c(0,5000))+ggtitle("Fungi")+geom_vline(xintercept = 1000, linetype = "dashed")
## rarefying sample 10-T0
## rarefying sample 101-T2
## rarefying sample 104-T3
## rarefying sample 105-T0
## rarefying sample 106-T0
## rarefying sample 107-T1
## rarefying sample 109-T2
## rarefying sample 11-T1
## rarefying sample 113-T0
## rarefying sample 115-T1
## rarefying sample 116-T1
## rarefying sample 117-T2
## rarefying sample 118-T2
## rarefying sample 119-T3
## rarefying sample 12-T1
## rarefying sample 120-T3
## rarefying sample 121-T0
## rarefying sample 122-T0
## rarefying sample 123-T1
## rarefying sample 126-T2
## rarefying sample 128-T3
## rarefying sample 129-T0
## rarefying sample 13-T2
## rarefying sample 130-T0
## rarefying sample 131-T1
## rarefying sample 132-T1
## rarefying sample 133-T2
## rarefying sample 134-T2
## rarefying sample 136-T3
## rarefying sample 14-T2
## rarefying sample 15-T3
## rarefying sample 16-T3
## rarefying sample 17-T0
## rarefying sample 19-T1
## rarefying sample 20-T1
## rarefying sample 21-T2
## rarefying sample 24-T3
## rarefying sample 25-T0
## rarefying sample 26-T0
## rarefying sample 27-T1
## rarefying sample 28-T1
## rarefying sample 29-T2
## rarefying sample 30-T2
## rarefying sample 31-T3
## rarefying sample 32-T3
## rarefying sample 33-T0
## rarefying sample 34-T0
## rarefying sample 35-T1
## rarefying sample 36-T1
## rarefying sample 38-T2
## rarefying sample 40-T3
## rarefying sample 41-T0
## rarefying sample 43-T1
## rarefying sample 47-T3
## rarefying sample 49-T0
## rarefying sample 50-T0
## rarefying sample 51-T1
## rarefying sample 52-T1
## rarefying sample 53-T2
## rarefying sample 54-T2
## rarefying sample 55-T3
## rarefying sample 56-T3
## rarefying sample 57-T0
## rarefying sample 58-T0
## rarefying sample 6-T2
## rarefying sample 60-T1
## rarefying sample 62-T2
## rarefying sample 63-T3
## rarefying sample 64-T3
## rarefying sample 65-T0
## rarefying sample 66-T0
## rarefying sample 67-T1
## rarefying sample 68-T1
## rarefying sample 69-T2
## rarefying sample 71-T3
## rarefying sample 73-T0
## rarefying sample 76-T1
## rarefying sample 77-T2
## rarefying sample 81-T0
## rarefying sample 91-T1
## rarefying sample 92-T1
## rarefying sample 93-T2
## rarefying sample 94-T2
## rarefying sample 95-T3
## rarefying sample 96-T3
## rarefying sample 97-T0
## rarefying sample 98-T0
## rarefying sample R1
## rarefying sample R10
## rarefying sample R11
## rarefying sample R13
## rarefying sample R14
## rarefying sample R15
## rarefying sample R16
## rarefying sample R17
## rarefying sample R18
## rarefying sample R19
## rarefying sample R2
## rarefying sample R21
## rarefying sample R22
## rarefying sample R24
## rarefying sample R25
## rarefying sample R26
## rarefying sample R27
## rarefying sample R28
## rarefying sample R29
## rarefying sample R3
## rarefying sample R31
## rarefying sample R32
## rarefying sample R33
## rarefying sample R34
## rarefying sample R35
## rarefying sample R36
## rarefying sample R37
## rarefying sample R4
## rarefying sample R5
## rarefying sample R6
## rarefying sample R7
## rarefying sample R8
## rarefying sample R9
## rarefying sample RR1
## rarefying sample RR10
## rarefying sample RR11
## rarefying sample RR12
## rarefying sample RR13
## rarefying sample RR14
## rarefying sample RR15
## rarefying sample RR16
## rarefying sample RR17
## rarefying sample RR2
## rarefying sample RR3
## rarefying sample RR4
## rarefying sample RR5
## rarefying sample RR6
## rarefying sample RR8
## rarefying sample RR9

####### rarefy ####
####### rarefy ####
####### rarefy ####
####### rarefy ####

V4_rare <- rarefy_even_depth(frog_16S, sample.size = 2000)
ITS_rare <- rarefy_even_depth(frog_ITS, sample.size = 1000)
rarefaction_V4+rarefaction_ITS

4 Analysis: Effect of soft release

4.0.1 VIZ: Barplots

Phylum level barplots

###### phylum level ####
###### phylum level ####


V4_Phylum<- microbiome::aggregate_top_taxa(V4_rare, "Phylum", top = 11)

V4_melt<-psmelt(V4_Phylum)

V4_melt<-subset(V4_melt, OTU == "Proteobacteria")
V4_melt<- V4_melt %>% arrange(-Abundance)
dim(V4_melt)
## [1] 124  19
sample_order<-V4_melt$Sample


V4_barplot <- speedyseq::plot_bar(V4_Phylum, fill = "Phylum")+
  geom_bar(  stat = "identity",width = 0.95)+
  scale_fill_brewer( palette = "Paired", direction = -1)+
  facet_grid(~TimePoint+location, scales = "free", space='free')+
  ggtitle("a) Bacteria")+
  theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
  theme(legend.key.size = unit(0.5, "cm"))+
  labs(fill = "Bacterial Phylum")


V4_barplot$data$Sample <- factor(V4_barplot$data$Sample, levels = sample_order)


###################################################
###################################################
###################################################


ITS_Phylum<- microbiome::aggregate_top_taxa(ITS_rare, "Phylum", top = 11)

ITS_melt<-psmelt(ITS_Phylum)
ITS_melt<-subset(ITS_melt, OTU == "Basidiomycota")
ITS_melt<- ITS_melt %>% arrange(-Abundance)
dim(ITS_melt)
## [1] 136  18
sample_order_ITS<-ITS_melt$Sample


ITS_barplot<-speedyseq::plot_bar(ITS_Phylum, fill = "Phylum")+
  geom_bar(  stat = "identity",width = 0.95)+
  scale_fill_brewer( palette = "Paired", direction = 1)+
  facet_grid(~TimePoint+location, scales = "free", space='free')+
  ggtitle("b) Fungi")+
  theme(axis.text.x = element_blank())+
  theme(legend.key.size = unit(0.5, "cm"))+
  labs(fill = "Fungal Phylum")

ITS_barplot$data$Sample <- factor(ITS_barplot$data$Sample, levels = sample_order_ITS)


V4_barplot /ITS_barplot

Genus level barplots.

## genus level ###

# colour palette

names(wes_palettes)
##  [1] "BottleRocket1"  "BottleRocket2"  "Rushmore1"      "Rushmore"      
##  [5] "Royal1"         "Royal2"         "Zissou1"        "Darjeeling1"   
##  [9] "Darjeeling2"    "Chevalier1"     "FantasticFox1"  "Moonrise1"     
## [13] "Moonrise2"      "Moonrise3"      "Cavalcanti1"    "GrandBudapest1"
## [17] "GrandBudapest2" "IsleofDogs1"    "IsleofDogs2"
pala<- wes_palette("Rushmore1", 5, type = c("discrete"))

palb<- wes_palette("GrandBudapest2", 4, type = c("discrete"))

palneon<- c("#af3dff",  "#55ffe1",  "#ff3b94" , "#a6fd29" , "#37013a" ) 

palx<-brewer.pal(12,"Paired")
pali<-brewer.pal(12,"Pastel1")

palz<-c(palx, pala, palb, palneon)

palz[14]<- "bisque1"
palz[17]<- "tomato1"


V4_genus<- microbiome::aggregate_top_taxa(V4_rare, "Genus", top = 24)


V4_barplot <- speedyseq::plot_bar(V4_genus, fill = "Genus")+
  geom_bar(  stat = "identity",width = 0.95)+
  scale_fill_manual( values = palz)+
  facet_grid(~TimePoint+location, scales = "free", space='free')+
  ggtitle("a) Bacteria")+
  theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
  theme(legend.key.size = unit(0.4, "cm"))+
  labs(fill = "Bacterial genus")


# order samples

V4_barplot$data$Sample <- factor(V4_barplot$data$Sample, levels = sample_order)

###################################################
###################################################
###################################################


ITS_genus<- microbiome::aggregate_top_taxa(ITS_rare, "Genus", top = 24)


ITS_barplot<-speedyseq::plot_bar(ITS_genus, fill = "Genus")+
  geom_bar(  stat = "identity",width = 0.95)+
  scale_fill_manual( values = palz)+
  facet_grid(~TimePoint+location, scales = "free", space='free')+
  ggtitle("b) Fungi")+
  theme(axis.text.x = element_blank())+
  theme(legend.key.size = unit(0.4, "cm"))+
  labs(fill = "Fungal genus")

# order samples

ITS_barplot$data$Sample <- factor(ITS_barplot$data$Sample, levels = sample_order_ITS)

V4_barplot /ITS_barplot

4.0.2 VIZ: Venn diagram

## lab vs wild 

lab_ps<-subset_samples(frog_16S, TimePoint == "T1 (lab)")
lab_ps<-prune_taxa(taxa_sums(lab_ps)>0, lab_ps)

T2_ps<-subset_samples(frog_16S, TimePoint == "T2")
T2_ps<-prune_taxa(taxa_sums(T2_ps)>0, T2_ps)

T3_ps<-subset_samples(frog_16S, TimePoint == "T3")
T3_ps<-prune_taxa(taxa_sums(T3_ps)>0, T3_ps)


lab_ASVs<-taxa_names(lab_ps)
T2_ASVs<-taxa_names(T2_ps)
T3_ASVs<-taxa_names(T3_ps)


x <- list(
  lab = lab_ASVs,
  T2 = T2_ASVs,
  T3 = T3_ASVs
)


ggvenn(
  x, 
  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
  stroke_size = 0.5, set_name_size = 4)

#### ITS### 


lab_ps<-subset_samples(frog_ITS, TimePoint == "T1 (lab)")
lab_ps<-prune_taxa(taxa_sums(lab_ps)>0, lab_ps)

T2_ps<-subset_samples(frog_ITS, TimePoint == "T2")
T2_ps<-prune_taxa(taxa_sums(T2_ps)>0, T2_ps)

T3_ps<-subset_samples(frog_ITS, TimePoint == "T3")
T3_ps<-prune_taxa(taxa_sums(T3_ps)>0, T3_ps)

lab_ASVs<-taxa_names(lab_ps)
T2_ASVs<-taxa_names(T2_ps)
T3_ASVs<-taxa_names(T3_ps)


x <- list(
  lab = lab_ASVs,
  T2 = T2_ASVs,
  T3 = T3_ASVs
)


ggvenn(
  x, 
  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
  stroke_size = 0.5, set_name_size = 4)

4.0.3 Prevalence stats - genus level

Make sure we get representative genera from across time points given that samples from lab frogs are more common. This may cause a bias in genus selection.

# prevalence function

prevalence <- function(physeq, add_tax = TRUE){
  
  ## Check if taxa are rows
  trows <- taxa_are_rows(physeq)
  
  ## Extract OTU table
  otutab <- as.data.frame(otu_table(physeq))
  
  ## Transpose OTU table (species should be arranged by rows)
  if(trows == FALSE){
    otutab <- t(otutab)
  }
  
  ## Estimate prevalence (number of samples with OTU present)
  prevdf <- apply(X = otutab,
                  #MARGIN = ifelse(trows, yes = 1, no = 2),  # for a non-transposed data
                  MARGIN = 1,
                  FUN = function(x){sum(x > 0)})
  
  ## Add total and average read counts per OTU
  prevdf <- data.frame(Prevalence = prevdf,
                       TotalAbundance = taxa_sums(physeq),
                       MeanAbundance = rowMeans(otutab),
                       MedianAbundance = apply(otutab, 1, median))
  
  ## Add taxonomy table
  if(add_tax == TRUE && !is.null(tax_table(physeq, errorIfNULL = F))){
    prevdf <- cbind(prevdf, tax_table(physeq))
  }
  return(prevdf)
}

4.0.3.1 Select common bacterial genera to retain

V4_genus<-tax_glom(V4_rare, "Genus")

V4_genus_T1 <-subset_samples(V4_genus, TimePoint == "T1 (lab)")
V4_genus_T2 <-subset_samples(V4_genus, TimePoint == "T2")
V4_genus_T3 <-subset_samples(V4_genus, TimePoint == "T3")

V4_prev_T1<-prevalence(V4_genus_T1)
V4_prev_T2<-prevalence(V4_genus_T2)
V4_prev_T3<-prevalence(V4_genus_T3)

V4_prev_T1<-V4_prev_T1%>%arrange(-Prevalence)
V4_prev_T2<-V4_prev_T2%>%arrange(-Prevalence)
V4_prev_T3<-V4_prev_T3%>%arrange(-Prevalence)


# top genera per group

V4_top_genera_T1<- V4_prev_T1[1:12,]
V4_top_genera_T1$Genus
##  [1] "Aeromonas"                                 
##  [2] "Chryseobacterium"                          
##  [3] "Burkholderia-Caballeronia-Paraburkholderia"
##  [4] "Pseudomonas"                               
##  [5] "Acinetobacter"                             
##  [6] "Mucilaginibacter"                          
##  [7] "Microbacterium"                            
##  [8] "Rhodanobacter"                             
##  [9] "Comamonas"                                 
## [10] "Alkanindiges"                              
## [11] "Pedobacter"                                
## [12] "Myroides"
V4_top_genera_T2<- V4_prev_T2[1:12,]
V4_top_genera_T2$Genus
##  [1] "Massilia"                                  
##  [2] "uncultured bacterium"                      
##  [3] "Methylobacterium"                          
##  [4] "Acidothermus"                              
##  [5] "Blastococcus"                              
##  [6] "Pseudomonas"                               
##  [7] "Mycobacterium"                             
##  [8] "Conexibacter"                              
##  [9] "Burkholderia-Caballeronia-Paraburkholderia"
## [10] "Noviherbaspirillum"                        
## [11] "uncultured"                                
## [12] "Candidatus Xiphinematobacter"
V4_top_genera_T3<- V4_prev_T3[1:12,]
V4_top_genera_T3$Genus
##  [1] "Conexibacter"                              
##  [2] "Burkholderia-Caballeronia-Paraburkholderia"
##  [3] "Methylobacterium"                          
##  [4] "uncultured"                                
##  [5] "Acidothermus"                              
##  [6] "uncultured bacterium"                      
##  [7] "Gemmatimonas"                              
##  [8] "HSB OF53-F07"                              
##  [9] "Blastococcus"                              
## [10] "Mycobacterium"                             
## [11] "Sphingomonas"                              
## [12] "Nakamurella"
# select genera to keep


V4_genera_to_keep<-unique(c(V4_top_genera_T1$Genus, V4_top_genera_T2$Genus, V4_top_genera_T3$Genus))
V4_genera_to_keep<- V4_genera_to_keep[V4_genera_to_keep != "uncultured"]
V4_genera_to_keep<- V4_genera_to_keep[V4_genera_to_keep != "uncultured bacterium"]
V4_genera_to_keep
##  [1] "Aeromonas"                                 
##  [2] "Chryseobacterium"                          
##  [3] "Burkholderia-Caballeronia-Paraburkholderia"
##  [4] "Pseudomonas"                               
##  [5] "Acinetobacter"                             
##  [6] "Mucilaginibacter"                          
##  [7] "Microbacterium"                            
##  [8] "Rhodanobacter"                             
##  [9] "Comamonas"                                 
## [10] "Alkanindiges"                              
## [11] "Pedobacter"                                
## [12] "Myroides"                                  
## [13] "Massilia"                                  
## [14] "Methylobacterium"                          
## [15] "Acidothermus"                              
## [16] "Blastococcus"                              
## [17] "Mycobacterium"                             
## [18] "Conexibacter"                              
## [19] "Noviherbaspirillum"                        
## [20] "Candidatus Xiphinematobacter"              
## [21] "Gemmatimonas"                              
## [22] "HSB OF53-F07"                              
## [23] "Sphingomonas"                              
## [24] "Nakamurella"

4.0.3.2 Select common fungal genera to retain

############## ITS #####

ITS_genus<-tax_glom(ITS_rare, "Genus")

ITS_genus_T1 <-subset_samples(ITS_genus, TimePoint == "T1 (lab)")
ITS_genus_T2 <-subset_samples(ITS_genus, TimePoint == "T2")
ITS_genus_T3 <-subset_samples(ITS_genus, TimePoint == "T3")


ITS_prev_T1<-prevalence(ITS_genus_T1)
ITS_prev_T2<-prevalence(ITS_genus_T2)
ITS_prev_T3<-prevalence(ITS_genus_T3)

ITS_prev_T1<-ITS_prev_T1%>%arrange(-Prevalence)
ITS_prev_T2<-ITS_prev_T2%>%arrange(-Prevalence)
ITS_prev_T3<-ITS_prev_T3%>%arrange(-Prevalence)


# top genera per group

ITS_top_genera_T1<- ITS_prev_T1[1:12,]
ITS_top_genera_T1$Genus
##  [1] "Apiotrichum"                      "Cutaneotrichosporon"             
##  [3] "Kingdom_Fungi"                    "Acrodontium"                     
##  [5] "Moesziomyces"                     "Rhodotorula"                     
##  [7] "Basidiobolus"                     "Cladosporium"                    
##  [9] "Cystobasidium"                    "Cyphellophora"                   
## [11] "Family_Mortierellaceae"           "Rozellomycota_gen_Incertae_sedis"
ITS_top_genera_T2<- ITS_prev_T2[1:12,]
ITS_top_genera_T2$Genus
##  [1] "Alternaria"     "Saitoella"      "Cryptococcus"   "Naganishia"    
##  [5] "Cladosporium"   "Rhodotorula"    "Solicoccozyma"  "Filobasidium"  
##  [9] "Penicillium"    "Kingdom_Fungi"  "Coniochaeta"    "Sporobolomyces"
ITS_top_genera_T3<- ITS_prev_T3[1:12,]
ITS_top_genera_T3$Genus
##  [1] "Cladosporium"     "Alternaria"       "Penicillium"      "Kingdom_Fungi"   
##  [5] "Coniochaeta"      "Vishniacozyma"    "Botrytis"         "Naganishia"      
##  [9] "Sarocladium"      "Cryptococcus"     "Order_Helotiales" "Phaeococcomyces"
ITS_genera_to_keep<-unique(c(ITS_top_genera_T1$Genus, ITS_top_genera_T2$Genus, ITS_top_genera_T3$Genus))
ITS_genera_to_keep<- ITS_genera_to_keep[ITS_genera_to_keep != "uncultured"]
ITS_genera_to_keep<- ITS_genera_to_keep[ITS_genera_to_keep != "Kingdom_Fungi"]
ITS_genera_to_keep
##  [1] "Apiotrichum"                      "Cutaneotrichosporon"             
##  [3] "Acrodontium"                      "Moesziomyces"                    
##  [5] "Rhodotorula"                      "Basidiobolus"                    
##  [7] "Cladosporium"                     "Cystobasidium"                   
##  [9] "Cyphellophora"                    "Family_Mortierellaceae"          
## [11] "Rozellomycota_gen_Incertae_sedis" "Alternaria"                      
## [13] "Saitoella"                        "Cryptococcus"                    
## [15] "Naganishia"                       "Solicoccozyma"                   
## [17] "Filobasidium"                     "Penicillium"                     
## [19] "Coniochaeta"                      "Sporobolomyces"                  
## [21] "Vishniacozyma"                    "Botrytis"                        
## [23] "Sarocladium"                      "Order_Helotiales"                
## [25] "Phaeococcomyces"

4.0.4 VIZ: Heat map

# create a gradient color palette for abundance
grad_ab <- colorRampPalette(c("#faf3dd","#f7d486" ,"#5e6472"))
grad_ab <- colorRampPalette(c("mistyrose", "lightblue", "cadetblue", "navyblue"))
grad_ab_pal <- grad_ab(10)


V4_top<-subset_taxa(V4_rare, Genus %in% V4_genera_to_keep)
V4_top<-tax_glom(V4_top, "Genus")


ITS_top<-subset_taxa(ITS_rare, Genus %in% ITS_genera_to_keep)
ITS_top<-tax_glom(ITS_top, "Genus")

# Define colors for each sample type
# Specify colors

pal<-pals::stepped3()[c(1,5,9,13)]
pal[2]<-"violetred"
#pal[3]<-"aquamarine4"
pal
## [1] "#3182BD"   "violetred" "#31A354"   "#756BB1"
ann_colors = list(
  TimePoint = c(`T1 (lab)` = "#3182BD", T2 = "violetred", T3 = "#31A354"))


###############
###############
###############
###############

taxa_names(V4_top)<-paste(abbreviate(V4_top@tax_table@.Data[,2],6), 1:length(taxa_names(V4_top)), sep = "")


V4_top@tax_table@.Data[,6]<-abbreviate(V4_top@tax_table@.Data[,6],15)


heatmap1<-plot_taxa_heatmap(V4_top,
                            subset.top = 25,
                            VariableA = c("TimePoint"),
                            heatcolors = grad_ab_pal,
                            annotation_colors= ann_colors,
                            transformation = "log10",
                            cluster_rows = T,
                            cluster_cols = T,
                            clustering_method = "ward.D2",
                            show_colnames = F,
                            fontsize = 8,
                            cellwidth = 1.5, 
                            cellheight = 8)

## fungi

taxa_names(ITS_top)<-paste(abbreviate(ITS_top@tax_table@.Data[,2],6), 1:length(taxa_names(ITS_top)), sep = "")


ITS_top@tax_table@.Data[,6]<-abbreviate(ITS_top@tax_table@.Data[,6],15)


heatmap2<-plot_taxa_heatmap(ITS_top,
                            subset.top = 25,
                            VariableA = c("TimePoint"),
                            heatcolors = grad_ab_pal, 
                            annotation_colors= ann_colors,
                            transformation = "log10",
                            cluster_rows = T,
                            cluster_cols = T,
                            clustering_method = "ward.D2",
                            show_colnames = F,
                            fontsize = 8,
                            cellwidth = 1.5, 
                            cellheight = 8)

ggpubr::ggarrange(heatmap1$plot[[4]], heatmap2$plot[[4]],  ncol = 1, align = "v")

4.1 STATS: GLLVM model

4.1.0.1 Data prep: Agglomerate, transform, filter and merge ITS and 16S data

Agglomerate to genus, CLR transform, filter and merge ITS and 16S data. Data is CLR transformed before filtering to ensure CLR values represent compositional values prior to filtering.

V4_samples<-sample_names(frog_16S)
ITS_samples<-sample_names(frog_ITS)
samples_to_keep<-intersect(V4_samples,ITS_samples)


frog_16S_sub<-prune_samples(samples_to_keep, frog_16S)
frog_ITS_sub<-prune_samples(samples_to_keep, frog_ITS)


# glom by genus and get list of most common taxa per dataset

V4_genus<- tax_glom(frog_16S_sub, taxrank = "Genus")
ITS_genus<- tax_glom(frog_ITS_sub, taxrank = "Genus")


#########
#########

V4_top<-microbiome::aggregate_top_taxa(V4_genus, top = 26, level = "Genus")
ITS_top<-microbiome::aggregate_top_taxa(ITS_genus, top = 26, level = "Genus")

remove<-c("Other", "uncultured", "Kingdom_Fungi")

V4_to_keep<-taxa_names(V4_top)[!taxa_names(V4_top)%in% remove]
ITS_to_keep<-taxa_names(ITS_top)[!taxa_names(ITS_top)%in% remove]

# Proportion of reads left after filtering

sum(sample_sums(subset_taxa(V4_genus, Genus %in% V4_to_keep)))/sum(sample_sums(V4_genus))
## [1] 0.6391273
sum(sample_sums(subset_taxa(ITS_genus, Genus %in% ITS_to_keep)))/sum(sample_sums(ITS_genus))
## [1] 0.8695636
# CLR transformation


V4_clr <-microbiome::transform(V4_genus, transform = "clr")
ITS_clr <-microbiome::transform(ITS_genus, transform = "clr")

## Keep only top 25 genera

V4_clr<-subset_taxa(V4_clr, Genus %in% V4_to_keep)
ITS_clr<-subset_taxa(ITS_clr, Genus %in% ITS_to_keep)

## change names

taxa_names(V4_clr)<- data.frame(tax_table(V4_clr))$Genus
taxa_names(ITS_clr)<- data.frame(tax_table(ITS_clr))$Genus


### make merged otu table 

V4_otu<- data.frame(otu_table(V4_clr))
ITS_otu<- data.frame(otu_table(ITS_clr))

V4_otu[1:5,1:5]
ITS_otu[1:5,1:5]
names(V4_otu)<-sample_names(V4_clr)
names(ITS_otu)<-sample_names(ITS_clr)

full_otu<-rbind(V4_otu, ITS_otu)
full_otu<-otu_table(full_otu, taxa_are_rows = T)

# make merged tax table

V4_tax<- data.frame(tax_table(V4_clr))
ITS_tax<- data.frame(tax_table(ITS_clr))

full_tax<-rbind(V4_tax, ITS_tax)
full_tax2<-tax_table(full_tax)
taxa_names(full_tax2)<- row.names(full_tax)
colnames(full_tax2) <- c("Kingdom", "Phylum", "Class", 
                         "Order", "Family", "Genus", "Species")


### make merged metadata 

metadataV4<-data.frame(sample_data(V4_clr))
metadata<-sample_data(metadataV4)

## merge

merged_ps<-phyloseq(metadata, full_otu, full_tax2)
head(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                  Kingdom  Phylum         Class    Order   Family  Genus  Species
##                  <chr>    <chr>          <chr>    <chr>   <chr>   <chr>  <chr>  
## 1 Leucobacter    Bacteria Actinobacteria Actinob~ Microc~ Microb~ Leuco~ <NA>   
## 2 Microbacterium Bacteria Actinobacteria Actinob~ Microc~ Microb~ Micro~ <NA>   
## 3 Pseudomonas    Bacteria Proteobacteria Gammapr~ Pseudo~ Pseudo~ Pseud~ <NA>   
## 4 Acinetobacter  Bacteria Proteobacteria Gammapr~ Pseudo~ Moraxe~ Acine~ <NA>   
## 5 Alkanindiges   Bacteria Proteobacteria Gammapr~ Pseudo~ Moraxe~ Alkan~ <NA>   
## 6 Aeromonas      Bacteria Proteobacteria Gammapr~ Aeromo~ Aeromo~ Aerom~ <NA>
tail(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                       Kingdom Phylum        Class  Order  Family  Genus  Species
##                       <chr>   <chr>         <chr>  <chr>  <chr>   <chr>  <chr>  
## 1 Cutaneotrichosporon Fungi   Basidiomycota Treme~ Trich~ Tricho~ Cutan~ <NA>   
## 2 Apiotrichum         Fungi   Basidiomycota Treme~ Trich~ Tricho~ Apiot~ <NA>   
## 3 Cryptococcus        Fungi   Basidiomycota Treme~ Treme~ Crypto~ Crypt~ <NA>   
## 4 Solicoccozyma       Fungi   Basidiomycota Treme~ Filob~ Piskur~ Solic~ <NA>   
## 5 Filobasidium        Fungi   Basidiomycota Treme~ Filob~ Filoba~ Filob~ <NA>   
## 6 Naganishia          Fungi   Basidiomycota Treme~ Filob~ Filoba~ Nagan~ <NA>
taxa_names(merged_ps)[1:25]<- paste("B", taxa_names(merged_ps)[1:25], sep = "_")
taxa_names(merged_ps)[26:50]<- paste("F", taxa_names(merged_ps)[26:50], sep = "_")

taxa_names(merged_ps)
##  [1] "B_Leucobacter"                               
##  [2] "B_Microbacterium"                            
##  [3] "B_Pseudomonas"                               
##  [4] "B_Acinetobacter"                             
##  [5] "B_Alkanindiges"                              
##  [6] "B_Aeromonas"                                 
##  [7] "B_Proteus"                                   
##  [8] "B_Rhodanobacter"                             
##  [9] "B_Burkholderia-Caballeronia-Paraburkholderia"
## [10] "B_Massilia"                                  
## [11] "B_Comamonas"                                 
## [12] "B_Taibaiella"                                
## [13] "B_Mucilaginibacter"                          
## [14] "B_Pedobacter"                                
## [15] "B_Alistipes"                                 
## [16] "B_Rikenella"                                 
## [17] "B_Odoribacter"                               
## [18] "B_Bacteroides"                               
## [19] "B_Chryseobacterium"                          
## [20] "B_Myroides"                                  
## [21] "B_Methylobacterium"                          
## [22] "B_Akkermansia"                               
## [23] "B_Acidothermus"                              
## [24] "B_Mycobacterium"                             
## [25] "B_Rhodococcus"                               
## [26] "F_Cyphellophora"                             
## [27] "F_Botrytis"                                  
## [28] "F_Acrodontium"                               
## [29] "F_Talaromyces"                               
## [30] "F_Penicillium"                               
## [31] "F_Cladosporium"                              
## [32] "F_Order_Mortierellales"                      
## [33] "F_Family_Mortierellaceae"                    
## [34] "F_Alternaria"                                
## [35] "F_Pleurotus"                                 
## [36] "F_Flammulina"                                
## [37] "F_Family_Thelephoraceae"                     
## [38] "F_Rhodotorula"                               
## [39] "F_Family_Pezizaceae"                         
## [40] "F_Moesziomyces"                              
## [41] "F_Basidiobolus"                              
## [42] "F_Cystobasidium"                             
## [43] "F_Saitoella"                                 
## [44] "F_Rozellomycota_gen_Incertae_sedis"          
## [45] "F_Cutaneotrichosporon"                       
## [46] "F_Apiotrichum"                               
## [47] "F_Cryptococcus"                              
## [48] "F_Solicoccozyma"                             
## [49] "F_Filobasidium"                              
## [50] "F_Naganishia"
merged_ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 50 taxa and 109 samples ]:
## sample_data() Sample Data:        [ 109 samples by 14 sample variables ]:
## tax_table()   Taxonomy Table:     [ 50 taxa by 7 taxonomic ranks ]:
## taxa are rows

4.1.0.2 Fit GLLVM model: Effect of soft release

merged_ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 50 taxa and 109 samples ]:
## sample_data() Sample Data:        [ 109 samples by 14 sample variables ]:
## tax_table()   Taxonomy Table:     [ 50 taxa by 7 taxonomic ranks ]:
## taxa are rows
############# data prep ###
############# data prep ###
############# data prep ###

ys <- data.frame(t(otu_table(merged_ps)))
names(ys) <-taxa_names(merged_ps)

ys<-rescale(as.matrix(ys),  to = c(-1,1))


Xs<-data.frame(sample_data(merged_ps))


Xs$location<-factor(Xs$location)
Xs$Status<-factor(Xs$Status, levels = c("Pre-release", "Post-release"))
table(Xs$TimePoint)
## 
## T1 (lab)       T2       T3 
##       64       29       16
str(Xs)
## 'data.frame':    109 obs. of  14 variables:
##  $ feature.id       : chr  "10-T0" "101-T2" "104-T3" "109-T2" ...
##  $ date             : Date, format: "2019-12-05" "2019-12-05" ...
##  $ frog_id          : chr  "10" "101" "104" "109" ...
##  $ treatment        : chr  "C0" "C2" "C3" "C2" ...
##  $ clutch           : chr  "B" "B" "C" "E" ...
##  $ mass             : num  2.58 2.58 2.13 2.13 2.76 3.16 1.88 2.54 2.28 2.41 ...
##  $ location         : Factor w/ 4 levels "E1","E2","E3",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ sampling_event   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TimePoint        : Factor w/ 3 levels "T1 (lab)","T2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sample_or_Control: chr  "True sample" "True sample" "True sample" "True sample" ...
##  $ is.neg           : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Status           : Factor w/ 2 levels "Pre-release",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ mass_cat         : chr  "High" "High" "Low" "Low" ...
##  $ Seq_depth        : num  14255 4663 38081 38198 78739 ...
Xs$frog_id<-factor(Xs$frog_id)

Xs$Wild<-ifelse(Xs$TimePoint == "T1 (lab)", "Lab", "Wild")
Xs$Wild<-factor(Xs$Wild)


table(Xs$Wild)
## 
##  Lab Wild 
##   64   45
# fit model

fit <- gllvm(ys, Xs, 
             num.lv = 2,
             formula = ~  Status, 
             row.eff = ~(1|frog_id),
             starting.val = "zero",
             family = "gaussian")

#plot(fit)
#summary(fit)
#AIC(fit)

coefplot(fit, cex.ylab = 0.7)

# null model

fit_null <- gllvm(ys,
                  num.lv = 2,
                  family = "gaussian")


############################ extract for ggploting ####
############################ extract for ggploting ####
############################ extract for ggploting ####

df<-coef(fit)

est_df<-data.frame(df$Intercept)

est_df2<-data.frame(df$Xcoef) # choose columns of interest

est_df3<-merge(est_df, est_df2, by = 0)

head(est_df3)
# order genera

row.names(est_df3)<-est_df3$Row.names
est_df3<-est_df3[colnames(ys),]

head(est_df3)
#put est_df3 into long format

names(est_df3)[1]<- "Genus"
names(est_df3)[2]<- "Intercept"

estimates_df <- gather(est_df3, Treatment, Estimate, names(est_df3)[2]:names(est_df3)[ncol(est_df3)], factor_key=TRUE)

head(estimates_df )
############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####


confint_df<-data.frame(confint(fit))
dim(confint_df)
## [1] 253   2
confint_df<-rbind(confint_df[rownames(confint_df) %like% "Xcoef", ],
                  confint_df[rownames(confint_df) %like% "Intercept", ])

head(confint_df)
# add a column with correct variable level
variables<- colnames(est_df3)[3:ncol(est_df3)]
variables<-c(variables, "Intercept")
variables1<-rep(variables, nrow(est_df))
variables2<-variables1[order(match(variables1, variables))]

confint_df$Treatment<-variables2


# column with taxa names. Should be automatically in the correct order but double check

confint_df$Genus<-rep(colnames(ys), length(unique(confint_df$Treatment)))



# now have estimates, confidence intervals, aas seperate data frames, but they are in different formats. Need to massage them into one dataframe for plotting.

merged<-merge(estimates_df, confint_df, by = c("Treatment", "Genus"))


final_estimates_reduced<- merged
names(final_estimates_reduced)[4]<-"CI_lower"
names(final_estimates_reduced)[5]<-"CI_upper"

#final_estimates<- merged[,c(1,2,3,7,8)]
head(final_estimates_reduced)
unique(final_estimates_reduced$Treatment)
## [1] Intercept          StatusPost.release
## Levels: Intercept StatusPost.release
## add significance

final_estimates_reduced$Sig<- !data.table::between(0, final_estimates_reduced$CI_lower, final_estimates_reduced$CI_upper)


final_estimates_reduced$Genus<-factor(final_estimates_reduced$Genus)

levels(final_estimates_reduced$Treatment)
## [1] "Intercept"          "StatusPost.release"
final_estimates_reduced$Treatment<-factor(final_estimates_reduced$Treatment)
levels(final_estimates_reduced$Treatment)<-c("Pre-release", "Post-release")

final_estimates_reduced2<-subset(final_estimates_reduced, Treatment == "Post-release")

final_estimates_reduced2$Genus2<-abbreviate(final_estimates_reduced2$Genus, minlength = 20 )

head(final_estimates_reduced2)
final_estimates_reduced2 <- final_estimates_reduced2  %>%
  mutate(Kingdom = ifelse(substr(Genus, 1, 1) == "B", "Bacteria", "Fungi"))

P1<-ggplot(subset(final_estimates_reduced2, Kingdom == "Bacteria"), aes(y = reorder(Genus2, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "cadetblue"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(1,0.2, 0.2, 0.6),"cm"))+
  theme( strip.background = element_blank(),
         strip.text.x = element_blank())+
  theme(axis.title.x = element_blank())

P2<-ggplot(subset(final_estimates_reduced2, Kingdom == "Fungi"), aes(y = reorder(Genus2, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  # facet_wrap(~Kingdom, nrow = 2, scales = "free_y")  + 
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "cadetblue"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(1,0.2, 0.2, 0.6),"cm"))+
  theme( strip.background = element_blank(),
         strip.text.x = element_blank())+
  xlab("Estimate \n <~Pre-release vs Post-release~>")

ggarrange(P1, P2, ncol = 1, labels = c("a) Bacteria", "b) Fungi"), heights = c(1, 1.15))

4.2 VIZ: Alpha diversity

4.2.0.1 Alpha diversity: effect of soft release

################ alpha diversity ##############


sample_data(V4_rare)$Observed<-estimate_richness(V4_rare, split = TRUE, measures = c("Observed"))$Observed
sample_data(ITS_rare)$Observed<-estimate_richness(ITS_rare, split = TRUE, measures = c("Observed"))$Observed

sample_data(V4_rare)$Shannon<-estimate_richness(V4_rare, split = TRUE, measures = c("Shannon"))$Shannon
sample_data(ITS_rare)$Shannon<-estimate_richness(ITS_rare, split = TRUE, measures = c("Shannon"))$Shannon


pal<-pals::stepped3()[c(1,5,9,13)]
pal[2]<-"violetred"


alpha1<-ggplot(sample_data(V4_rare), aes(y = Observed, x = TimePoint, col = TimePoint, fill = TimePoint))+
  geom_boxplot(alpha = 0.5, col = "black")+
  geom_jitter( pch = 21, size = 2, width = 0.1)+
  geom_line(aes(group = frog_id), alpha = 0.6, col = "grey")+
  theme_bw()+
  xlab("Time point")+
  ylab("Bacterial ASV diversity")+
  scale_color_manual(values = pal)+
  scale_fill_manual(values = pal)


alpha2<-ggplot(sample_data(ITS_rare), aes(y = Observed, x = TimePoint, col = TimePoint, fill = TimePoint))+
  geom_boxplot( alpha = 0.5, col = "black")+
  geom_jitter( pch = 21, size = 2, width = 0.1)+
  geom_line(aes(group = frog_id), alpha = 0.6, col = "grey")+
  theme_bw()+
  xlab("Time point")+
  ylab("Fungal ASV diversity")+
  scale_color_manual(values = pal)+
  scale_fill_manual(values = pal)


ggarrange(alpha1, alpha2, ncol = 2, common.legend = T)

shannon_16S<-ggplot(sample_data(V4_rare), aes(y = Shannon, x = TimePoint))+
  geom_boxplot(fill = "skyblue")+
  geom_point()+
  geom_line(aes(group = frog_id), alpha = 0.5)+
  theme_bw()+
  ylab("Shannon diversity")

shannon_ITS<-ggplot(sample_data(ITS_rare), aes(y = Shannon, x = TimePoint))+
  geom_boxplot(fill = "skyblue")+
  geom_point()+
  geom_line(aes(group = frog_id), alpha = 0.5)+
  theme_bw()+
  ylab("Shannon diversity")

ggarrange(shannon_16S, shannon_ITS)

### correlations ####
### correlations ####
### correlations ####
### correlations ####

metadata_16S<-data.frame(sample_data(V4_rare))
metadata_16S<-subset(metadata_16S, location != "Unknown"& location != "E2" & location != "E5")

metadata_ITS<-data.frame(sample_data(ITS_rare))
metadata_ITS<-subset(metadata_ITS, location != "Unknown"& location != "E2" & location != "E5")



names(metadata_16S)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
names(metadata_ITS)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "Sample_or_Control"
## [10] "is.neg"            "TimePoint"         "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
alpha_merged<-merge(metadata_16S[,c(1,3,7,9,14,15)], metadata_ITS[,c(1,14,15)], by = "feature.id")

head(alpha_merged)
alpha_merged$feature.id
##  [1] "10-T0"  "101-T2" "104-T3" "109-T2" "11-T1"  "113-T0" "115-T1" "116-T1"
##  [9] "118-T2" "119-T3" "12-T1"  "120-T3" "121-T0" "122-T0" "123-T1" "126-T2"
## [17] "128-T3" "129-T0" "13-T2"  "131-T1" "132-T1" "133-T2" "134-T2" "136-T3"
## [25] "15-T3"  "19-T1"  "21-T2"  "24-T3"  "25-T0"  "26-T0"  "27-T1"  "28-T1" 
## [33] "29-T2"  "30-T2"  "31-T3"  "32-T3"  "33-T0"  "34-T0"  "35-T1"  "40-T3" 
## [41] "41-T0"  "43-T1"  "47-T3"  "53-T2"  "55-T3"  "56-T3"  "58-T0"  "6-T2"  
## [49] "60-T1"  "62-T2"  "63-T3"  "65-T0"  "66-T0"  "67-T1"  "68-T1"  "69-T2" 
## [57] "71-T3"  "73-T0"  "77-T2"  "91-T1"  "92-T1"  "95-T3"  "96-T3"  "98-T0" 
## [65] "R1"     "R10"    "R2"     "R24"    "R25"    "R26"    "R27"    "R28"   
## [73] "R29"    "R3"     "R31"    "R32"    "R33"    "R34"    "R35"    "R36"   
## [81] "R37"    "R4"     "R5"     "R8"     "R9"     "RR10"   "RR11"   "RR12"  
## [89] "RR13"   "RR14"   "RR15"   "RR16"   "RR17"   "RR6"    "RR8"    "RR9"
names(alpha_merged)
## [1] "feature.id"  "frog_id"     "location"    "TimePoint"   "Seq_depth.x"
## [6] "Observed.x"  "Seq_depth.y" "Observed.y"
names(alpha_merged)[5]<-"Seq_depth_V4"
names(alpha_merged)[6]<-"Observed_V4"

names(alpha_merged)[7]<-"Seq_depth_ITS"
names(alpha_merged)[8]<-"Observed_ITS"

cor_plot<-ggplot(alpha_merged, aes(x = Observed_V4, y = Observed_ITS))+
  geom_line(aes(group = frog_id), col = "lightgrey")+
  geom_point( aes(fill = TimePoint), size = 3, pch = 21, col = "darkgrey")+
  scale_fill_brewer(palette = "Paired")+
  theme_bw()+
  xlab("Bacterial ASV diversity")+
  ylab("Fungal ASV diversity")+
  labs(fill = "Time point")+
  labs(color = "Time point")+
  geom_smooth(method = "lm",  aes(col = TimePoint), se = F)+
  scale_color_manual(values = pal)+
  scale_fill_manual(values = pal)


cor_plot

ggarrange(alpha1, alpha2,cor_plot, ncol = 3, common.legend = T, legend = "right", labels = c("a)", "b)", "c)"))

4.2.1 STATS: Alpha diversity stats (GLM)

## statistics

metadata_16S<-data.frame(sample_data(V4_rare))
names(metadata_16S)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
dim(metadata_16S)
## [1] 124  16
detach(package:plyr)

metadata_16S %>% group_by(Status) %>% summarize(mean = mean(Observed))
##############

center <-  function(x) {
  scaled_values <- x - mean(x)
  return(scaled_values)
}

metadata_16S$frog_id<-factor(metadata_16S$frog_id)
metadata_16S$location<-factor(metadata_16S$location)
metadata_16S$Observed_scaled<- as.numeric(center(metadata_16S$Observed))
metadata_16S$Seq_depth_scaled<- as.numeric(scale(metadata_16S$Seq_depth))


### fit models ##
### fit models ##
### fit models ##

m_alpha_16S<- glmmTMB(Observed ~ Status  + Seq_depth_scaled + (1|frog_id), data = metadata_16S)

m_alpha_16S2<- glmmTMB(Observed ~ TimePoint  +Seq_depth_scaled+ (1|frog_id), data = metadata_16S)

m_alpha_16S3<- glmmTMB(Observed ~ location  +Seq_depth_scaled+ (1|frog_id), data = metadata_16S)


sjPlot::tab_model(m_alpha_16S)
  Observed
Predictors Estimates CI p
(Intercept) 261.30 234.39 – 288.20 <0.001
Status [Pre-release] -190.55 -225.16 – -155.94 <0.001
Seq depth scaled 35.66 18.88 – 52.45 <0.001
Random Effects
σ2 8070.23
τ00 frog_id 0.00
N frog_id 84
Observations 124
Marginal R2 / Conditional R2 0.487 / NA
sjPlot::tab_model(m_alpha_16S2)
  Observed
Predictors Estimates CI p
(Intercept) 70.64 50.41 – 90.87 <0.001
TimePoint [T2] 203.46 164.05 – 242.87 <0.001
TimePoint [T3] 167.19 118.21 – 216.17 <0.001
Seq depth scaled 36.10 19.42 – 52.78 <0.001
Random Effects
σ2 7959.72
τ00 frog_id 0.00
N frog_id 84
Observations 124
Marginal R2 / Conditional R2 0.494 / NA
sjPlot::plot_model(m_alpha_16S)

sjPlot::plot_model(m_alpha_16S3)

summary(m_alpha_16S2)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ TimePoint + Seq_depth_scaled + (1 | frog_id)
## Data: metadata_16S
## 
##      AIC      BIC   logLik deviance df.resid 
##   1477.7   1494.6   -732.8   1465.7      118 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance  Std.Dev. 
##  frog_id  (Intercept) 6.763e-20 2.601e-10
##  Residual             7.960e+03 8.922e+01
## Number of obs: 124, groups:  frog_id, 84
## 
## Dispersion estimate for gaussian family (sigma^2): 7.96e+03 
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        70.639     10.321   6.844 7.68e-12 ***
## TimePointT2       203.458     20.106  10.119  < 2e-16 ***
## TimePointT3       167.192     24.989   6.691 2.22e-11 ***
## Seq_depth_scaled   36.100      8.512   4.241 2.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## shannon ##

shannon_v4<- glmmTMB(Shannon ~ Status  + Seq_depth_scaled + (1|frog_id), data = metadata_16S)

summary(shannon_v4)
##  Family: gaussian  ( identity )
## Formula:          Shannon ~ Status + Seq_depth_scaled + (1 | frog_id)
## Data: metadata_16S
## 
##      AIC      BIC   logLik deviance df.resid 
##    391.3    405.4   -190.6    381.3      119 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  frog_id  (Intercept) 0.1917   0.4378  
##  Residual             1.0867   1.0424  
## Number of obs: 124, groups:  frog_id, 84
## 
## Dispersion estimate for gaussian family (sigma^2): 1.09 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         4.8166     0.1795  26.840   <2e-16 ***
## StatusPre-release  -2.2281     0.2181 -10.216   <2e-16 ***
## Seq_depth_scaled    0.2047     0.1077   1.901   0.0573 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
########### ITS #######
########### ITS #######
########### ITS #######
########### ITS #######

metadata_ITS<-data.frame(sample_data(ITS_rare))

metadata_ITS %>% group_by(Status) %>% summarize(mean = mean(Observed))
metadata_ITS$frog_id<-factor(metadata_ITS$frog_id)
metadata_ITS$location<-factor(metadata_ITS$location)
metadata_ITS$Observed_scaled<- as.numeric(scale(metadata_ITS$Observed))
metadata_ITS$Seq_depth_scaled<- as.numeric(scale(metadata_ITS$Seq_depth))

###### fit models ###
###### fit models ###
###### fit models ###

m_alpha_ITS<- glmmTMB(Observed ~ Status  + Seq_depth_scaled+ (1|frog_id), data = metadata_ITS)

summary(m_alpha_ITS)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ Status + Seq_depth_scaled + (1 | frog_id)
## Data: metadata_ITS
## 
##      AIC      BIC   logLik deviance df.resid 
##   1155.9   1170.4   -572.9   1145.9      131 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  frog_id  (Intercept)   7.513   2.741  
##  Residual             259.673  16.114  
## Number of obs: 136, groups:  frog_id, 95
## 
## Dispersion estimate for gaussian family (sigma^2):  260 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         58.732      2.361  24.876   <2e-16 ***
## StatusPre-release  -47.864      2.920 -16.391   <2e-16 ***
## Seq_depth_scaled     2.493      1.407   1.772   0.0765 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::tab_model(m_alpha_ITS)
  Observed
Predictors Estimates CI p
(Intercept) 58.73 54.10 – 63.36 <0.001
Status [Pre-release] -47.86 -53.59 – -42.14 <0.001
Seq depth scaled 2.49 -0.27 – 5.25 0.076
Random Effects
σ2 259.67
τ00 frog_id 7.51
ICC 0.03
N frog_id 95
Observations 136
Marginal R2 / Conditional R2 0.667 / 0.677
m_alpha_ITS2<- glmmTMB(Observed ~ TimePoint  + Seq_depth_scaled+ (1|frog_id), data = metadata_ITS)

sjPlot::tab_model(m_alpha_ITS2)
  Observed
Predictors Estimates CI p
(Intercept) 10.87 7.44 – 14.30 <0.001
TimePoint [T2] 46.91 40.40 – 53.43 <0.001
TimePoint [T3] 49.84 41.16 – 58.52 <0.001
Seq depth scaled 2.55 -0.21 – 5.31 0.070
Random Effects
σ2 258.40
τ00 frog_id 8.11
ICC 0.03
N frog_id 95
Observations 136
Marginal R2 / Conditional R2 0.668 / 0.678
plot_model(m_alpha_ITS2)

metadata_ITS$Observed_predicted<- stats::predict(m_alpha_ITS2, new_data = new_data)

## shannon

shannon_ITS<- glmmTMB(Shannon ~ Status  + Seq_depth_scaled + (1|frog_id), data = metadata_ITS)

summary(shannon_ITS)
##  Family: gaussian  ( identity )
## Formula:          Shannon ~ Status + Seq_depth_scaled + (1 | frog_id)
## Data: metadata_ITS
## 
##      AIC      BIC   logLik deviance df.resid 
##    281.9    296.5   -136.0    271.9      131 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  frog_id  (Intercept) 0.07554  0.2749  
##  Residual             0.36170  0.6014  
## Number of obs: 136, groups:  frog_id, 95
## 
## Dispersion estimate for gaussian family (sigma^2): 0.362 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        2.775920   0.097960  28.337   <2e-16 ***
## StatusPre-release -1.433658   0.114938 -12.473   <2e-16 ***
## Seq_depth_scaled   0.003101   0.056703   0.055    0.956    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::tab_model(shannon_ITS)
  Shannon
Predictors Estimates CI p
(Intercept) 2.78 2.58 – 2.97 <0.001
Status [Pre-release] -1.43 -1.66 – -1.21 <0.001
Seq depth scaled 0.00 -0.11 – 0.11 0.956
Random Effects
σ2 0.36
τ00 frog_id 0.08
ICC 0.17
N frog_id 95
Observations 136
Marginal R2 / Conditional R2 0.522 / 0.604
sjPlot::tab_model(shannon_v4)
  Shannon
Predictors Estimates CI p
(Intercept) 4.82 4.46 – 5.17 <0.001
Status [Pre-release] -2.23 -2.66 – -1.80 <0.001
Seq depth scaled 0.20 -0.01 – 0.42 0.057
Random Effects
σ2 1.09
τ00 frog_id 0.19
ICC 0.15
N frog_id 84
Observations 124
Marginal R2 / Conditional R2 0.455 / 0.537
#### correlation ###
#### correlation ###
#### correlation ###
#### correlation ###


head(alpha_merged)
alpha_cor_model<- glmmTMB(Observed_V4 ~ Observed_ITS + (1|frog_id), data = alpha_merged)

sjPlot::tab_model(alpha_cor_model)
  Observed_V4
Predictors Estimates CI p
(Intercept) 40.40 22.95 – 57.84 <0.001
Observed ITS 3.58 3.16 – 4.01 <0.001
Random Effects
σ2 3036.13
τ00 frog_id 943.15
ICC 0.24
N frog_id 71
Observations 96
Marginal R2 / Conditional R2 0.735 / 0.798
summary(alpha_cor_model)
##  Family: gaussian  ( identity )
## Formula:          Observed_V4 ~ Observed_ITS + (1 | frog_id)
## Data: alpha_merged
## 
##      AIC      BIC   logLik deviance df.resid 
##   1074.4   1084.7   -533.2   1066.4       92 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  frog_id  (Intercept)  943.1   30.71   
##  Residual             3036.1   55.10   
## Number of obs: 96, groups:  frog_id, 71
## 
## Dispersion estimate for gaussian family (sigma^2): 3.04e+03 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   40.3951     8.9027   4.537 5.69e-06 ***
## Observed_ITS   3.5830     0.2174  16.479  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Beta diversity analysis (constrained ordination)

4.3.1 STATS: Beta diversity - 16S

#############

V4_rare_sub<- V4_rare

wunifrac_dist<-distance(V4_rare_sub, method = "wunifrac")


otutable<-data.frame(t(V4_rare_sub@otu_table@.Data))
metadata <- data.frame(sample_data(V4_rare_sub))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
Seq_depth <- as.numeric(scale(metadata$Seq_depth))
location <-metadata$location
time_point <-metadata$TimePoint
treatment <-metadata$treatment



final_model<-capscale(wunifrac_dist ~ 
                        time_point+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 
final_model$CCA$rank
## [1] 3
# anova

anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
##### enclosure ######
##### enclosure ######
##### enclosure ######



final_model2<-capscale(wunifrac_dist ~ 
                         location+
                         Seq_depth,
                       env = metadata, 
                       comm = otutable) 

# weighted unifrac

anova_wunifrac<-anova.cca(final_model2, by="terms")
anova_wunifrac

4.3.2 VIZ: Beta diversity - 16S

####### plot effect of time point #####

## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(V4_rare_sub))[,c("feature.id", "TimePoint", "frog_id")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########


centroids_df<-data.frame(final_model_df$centroids)
centroids_df
row.names(centroids_df)<-c("T1 (lab)", "T2", "T3" )


###########
###########
###########
###########

vectors_wunifrac1<-vectors_df
centroids_wunifrac1<-centroids_df

# colour palette
pal<-pals::stepped3()[c(1,5,9,13)]
pal[2]<-"violetred"


V4_timepoint<-ggplot(vectors_wunifrac1, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = TimePoint), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =TimePoint), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  
  theme_bw()+
  geom_line(aes(group = frog_id), alpha = 0.2)+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  # add arrows
  
  geom_segment(data=centroids_wunifrac1, aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_wunifrac1,  
                            alpha = 0.9, col = "black", size = 4, fill = "yellow", 
                            aes(CAP1, CAP2, label = row.names(centroids_wunifrac1)))+
  
  theme_light(base_size = 12)+
  ggtitle("a) Bacteria: Time point")+
  labs(fill = "Time point", col = "Time point")

V4_timepoint

####### plot effect of enclosure #####


## extract data from model

final_model_df<-scores(final_model2)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(V4_rare_sub))[,c("feature.id", "location", "frog_id")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########

centroids_df<-data.frame(final_model_df$centroids)
#centroids_df<-centroids_df[1:6,]
centroids_df
centroids_df<-centroids_df[1:4,]
row.names(centroids_df)<-c("E1","E2", "E3", "Lab" )


###########
###########
###########
###########

vectors_wunifrac2<-vectors_df
centroids_wunifrac2<-centroids_df



V4_location<-ggplot(vectors_wunifrac2, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = location), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =location), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  
  theme_bw()+
  geom_line(aes(group = frog_id), alpha = 0.2)+
  
  scale_fill_viridis(discrete = TRUE, direction = -1)+
  scale_color_viridis(discrete = TRUE, direction = -1)+
  
  # add arrows
  
  geom_segment(data=centroids_wunifrac2, aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_wunifrac2,  
                            alpha = 0.9, col = "black", size = 4, fill = "yellow", 
                            aes(CAP1, CAP2, label = row.names(centroids_wunifrac2)))+
  
  theme_light(base_size = 12)+
  ggtitle("c) Bacteria: Enclosure")+
  labs(fill = "Enclosure", col = "Enclosure")

V4_location

4.3.3 STATS: Beta diversity - ITS

# effect of time point 

ITS_rare_sub<- subset_samples(ITS_rare, location != "Unknown")


wunifrac_dist<-distance(ITS_rare_sub, method = "wunifrac")
otutable<-data.frame(t(ITS_rare_sub@otu_table@.Data))
metadata <- data.frame(sample_data(ITS_rare_sub))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "Sample_or_Control"
## [10] "is.neg"            "TimePoint"         "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
Seq_depth <- as.numeric(scale(metadata$Seq_depth))
location <-metadata$location
time_point <-metadata$TimePoint


final_model<-capscale(wunifrac_dist ~ 
                        time_point+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 


anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
# effect of enclosure 



final_model2<-capscale(wunifrac_dist ~ 
                         location+
                         Seq_depth,
                       env = metadata, 
                       comm = otutable) 


anova_wunifrac<-anova.cca(final_model2, by="terms")
anova_wunifrac

4.3.4 VIZ: Beta diversity - ITS

## plot for time point ##
## plot for time point ##

## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ITS_rare_sub))[,c("feature.id", "TimePoint", "frog_id")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########


centroids_df<-data.frame(final_model_df$centroids)
#centroids_df<-centroids_df[1:6,]
centroids_df
row.names(centroids_df)<-c("T1 (lab)", "T2", "T3" )


###########

vectors_wunifrac3<-vectors_df
centroids_wunifrac3<-centroids_df



ITS_timepoint<-ggplot(vectors_wunifrac3, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = TimePoint), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =TimePoint), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  geom_line(aes(group = frog_id), alpha = 0.2)+
  
  theme_bw()+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  # add arrows
  
  geom_segment(data=centroids_wunifrac3, aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_wunifrac3,  
                            alpha = 0.9, col = "black", size = 4, fill = "yellow", 
                            aes(CAP1, CAP2, label = row.names(centroids_wunifrac3)))+
  
  theme_light(base_size = 12)+
  ggtitle("b) Fungi: Time point")+
  labs(fill = "Time point", col = "Time point")

ITS_timepoint

# plot for enclosure
# plot for enclosure
# plot for enclosure


## extract data from model

final_model_df<-scores(final_model2)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ITS_rare_sub))[,c("feature.id", "location", "frog_id")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########

centroids_df<-data.frame(final_model_df$centroids)
centroids_df<-centroids_df[1:4,]
centroids_df
row.names(centroids_df)<-c("E1","E2", "E3",  "Lab" )


###########
###########
###########
###########

vectors_wunifrac4<-vectors_df
centroids_wunifrac4<-centroids_df



ITS_location<-ggplot(vectors_wunifrac4, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = location), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =location), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  geom_line(aes(group = frog_id), alpha = 0.2)+
  
  theme_bw()+
  scale_fill_viridis(discrete = TRUE, direction = -1)+
  scale_color_viridis(discrete = TRUE, direction = -1)+
  
  # add arrows
  
  geom_segment(data=centroids_wunifrac4, aes(x = 0, y = 0, xend = CAP1, yend = CAP2),
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1,  col = "black")+
  ggrepel::geom_label_repel(data=centroids_wunifrac4,  
                            alpha = 0.9, col = "black", size = 4, fill = "yellow", 
                            aes(CAP1, CAP2, label = row.names(centroids_wunifrac4)))+
  
  theme_light(base_size = 12)+
  ggtitle("d) Fungi: Enclosure")+
  labs(fill = "Enclosure", col = "Enclosure")



# plot together
beta1<-ggarrange(V4_timepoint, ITS_timepoint, common.legend = T, legend = "right")

beta2<-ggarrange(V4_location, ITS_location, common.legend = T, legend = "right")

ggarrange(beta1, beta2, ncol = 1)

4.4 STATS: Homogeneity of variance test

# bacteria

wunifrac_dist<-distance(V4_rare_sub, method = "wunifrac")

# Extract the group membership
groups <- sample_data(V4_rare_sub)$TimePoint

# Perform beta dispersion
beta_disp <- betadisper(wunifrac_dist, groups)

# View the results
print(beta_disp)
## 
##  Homogeneity of multivariate dispersions
## 
## Call: betadisper(d = wunifrac_dist, group = groups)
## 
## No. of Positive Eigenvalues: 77
## No. of Negative Eigenvalues: 38
## 
## Average distance to median:
## T1 (lab)       T2       T3 
##   0.2038   0.1384   0.1271 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 115 eigenvalues)
##  PCoA1  PCoA2  PCoA3  PCoA4  PCoA5  PCoA6  PCoA7  PCoA8 
## 1.9441 1.7033 0.9191 0.3016 0.1584 0.1377 0.1269 0.1066
plot(beta_disp)

# Perform an analysis of variance on the beta dispersion result
anova_beta_disp <- anova(beta_disp)
anova_beta_disp 
# fungi

wunifrac_dist<-distance(ITS_rare_sub, method = "wunifrac")

# Extract the group membership
groups <- sample_data(ITS_rare_sub)$TimePoint

# Perform beta dispersion
beta_disp <- betadisper(wunifrac_dist, groups)

# View the results
print(beta_disp)
## 
##  Homogeneity of multivariate dispersions
## 
## Call: betadisper(d = wunifrac_dist, group = groups)
## 
## No. of Positive Eigenvalues: 68
## No. of Negative Eigenvalues: 67
## 
## Average distance to median:
## T1 (lab)       T2       T3 
##   0.2609   0.1888   0.2828 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 135 eigenvalues)
##  PCoA1  PCoA2  PCoA3  PCoA4  PCoA5  PCoA6  PCoA7  PCoA8 
## 3.9640 2.4493 1.6857 0.9088 0.6947 0.6055 0.5616 0.4259
plot(beta_disp)

# Perform an analysis of variance on the beta dispersion result
anova_beta_disp <- anova(beta_disp)
anova_beta_disp 

4.5 Network analysis

4.5.0.1 Residual correlation plot

merged_ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 50 taxa and 109 samples ]:
## sample_data() Sample Data:        [ 109 samples by 14 sample variables ]:
## tax_table()   Taxonomy Table:     [ 50 taxa by 7 taxonomic ranks ]:
## taxa are rows
############# data prep ###
############# data prep ###
############# data prep ###

ys <- data.frame(t(otu_table(merged_ps)))
names(ys) <-taxa_names(merged_ps)

ys<-rescale(as.matrix(ys),  to = c(-1,1))

Xs<-data.frame(sample_data(merged_ps))

Xs$location<-factor(Xs$location)
Xs$Status<-factor(Xs$Status, levels = c("Pre-release", "Post-release"))
table(Xs$TimePoint)
## 
## T1 (lab)       T2       T3 
##       64       29       16
str(Xs)
## 'data.frame':    109 obs. of  14 variables:
##  $ feature.id       : chr  "10-T0" "101-T2" "104-T3" "109-T2" ...
##  $ date             : Date, format: "2019-12-05" "2019-12-05" ...
##  $ frog_id          : chr  "10" "101" "104" "109" ...
##  $ treatment        : chr  "C0" "C2" "C3" "C2" ...
##  $ clutch           : chr  "B" "B" "C" "E" ...
##  $ mass             : num  2.58 2.58 2.13 2.13 2.76 3.16 1.88 2.54 2.28 2.41 ...
##  $ location         : Factor w/ 4 levels "E1","E2","E3",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ sampling_event   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TimePoint        : Factor w/ 3 levels "T1 (lab)","T2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sample_or_Control: chr  "True sample" "True sample" "True sample" "True sample" ...
##  $ is.neg           : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Status           : Factor w/ 2 levels "Pre-release",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ mass_cat         : chr  "High" "High" "Low" "Low" ...
##  $ Seq_depth        : num  14255 4663 38081 38198 78739 ...
Xs$frog_id<-factor(Xs$frog_id)

Xs$Wild<-ifelse(Xs$TimePoint == "T1 (lab)", "Lab", "Wild")
Xs$Wild<-factor(Xs$Wild)


table(Xs$Wild)
## 
##  Lab Wild 
##   64   45
# fit model

fit <- gllvm(ys, Xs, 
             num.lv = 2,
             formula = ~  TimePoint+location, 
             starting.val = "zero",
             family = "gaussian")

coefplot(fit)

### residual correlation

cr1<-data.frame(getResidualCor(fit))#


names(cr1)<-names(data.frame(ys))
names(cr1)<-abbreviate(names(cr1), minlength = 15)

row.names(cr1)<-names(data.frame(ys))
rownames(cr1)<-abbreviate(rownames(cr1), minlength = 15 )

cr2<-cor_pmat(cr1)


# Regions coloured in dark blue in correlation plot indicate 
# clusters of species that are positively correlated with each other, 
#after controlling for (co)variation in species explained by 
#environmental terms

corplot1<-ggcorrplot(cr1, 
                     hc.order = F,
                     outline.col = "white",
                     type = "full",
                     ggtheme = ggplot2::theme_minimal,
                     tl.cex = 7.5,
                     p.mat = cr2,
                     sig.level = 0.01,
                     #show.diag = F,
                     insig = "blank",
                     #  colors = c("#6D9EC1", "white", "#E46726"))
                     colors = c("blue", "white", "red"))+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5))+
  theme(plot.margin=unit(c(1,1,1,1),"cm"))+
  geom_vline(xintercept = 25.5)+
  geom_hline(yintercept = 25.5)
# annotate("rect", xmin = 21.5, xmax = 22.5, ymin = 0, ymax = 50.5,
#  alpha = .5,fill = NA, col = "black", linetype = "dashed")

corplot1

###### null model ###
###### null model ###
###### null model ###

fit_null <- gllvm(ys,
                  num.lv = 2,
                  starting.val = "zero",
                  family = "gaussian")


# amount of variation explained by TimePoint

rcov <- getResidualCov(fit, adjust = 0)
rcov0 <- getResidualCov(fit_null, adjust = 0)
rcov0$trace; rcov$trace
## [1] 6.81847
## [1] 2.17145
1 - rcov$trace / rcov0$trace
## [1] 0.6815341
#The ratio of traces suggests that environmental variables explain approximately 68% of the (co)variation in microbial species abundances.

cr1_null<-data.frame(getResidualCor(fit_null))#


names(cr1_null)<-names(data.frame(ys))
names(cr1_null)<-abbreviate(names(cr1_null), minlength = 15)

row.names(cr1_null)<-names(data.frame(ys))
rownames(cr1_null)<-abbreviate(rownames(cr1_null), minlength = 15 )

cr2_null<-cor_pmat(cr1_null)

#library(ggcorrplot)

# Regions coloured in dark blue in correlation plot indicate 
# clusters of species that are positively correlated with each other, 
#after controlling for (co)variation in species explained by 
#environmental terms

ggcorrplot(cr1_null, 
           hc.order = F,
           outline.col = "white",
           type = "full",
           ggtheme = ggplot2::theme_minimal,
           tl.cex = 7.5,
           p.mat = cr2_null,
           sig.level = 0.01,
           #show.diag = F,
           insig = "blank",
           #  colors = c("#6D9EC1", "white", "#E46726"))
           colors = c("blue", "white", "red"))+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5))+
  theme(plot.margin=unit(c(1,1,1,1),"cm"))+
  geom_vline(xintercept = 25.5)+
  geom_hline(yintercept = 25.5)

4.5.0.2 Network figure

dim(cr1)
## [1] 50 50
corplot2<-ggcorrplot(cr1, 
                     hc.order = F,
                     outline.col = "white",
                     type = "lower",
                     ggtheme = ggplot2::theme_minimal,
                     tl.cex = 7.5,
                     p.mat = cr2,
                     sig.level = 0.01,
                     #show.diag = F,
                     insig = "blank",
                     # colors = c("#6D9EC1", "white", "#E46726"))
                     colors = c("blue", "white", "red"))+
  # theme(axis.text.x = element_text(angle = 90, vjust=0.5))+
  theme(plot.margin=unit(c(0.5,0.5,0.5,0.5),"cm"))+
  geom_vline(xintercept = 25.5)+
  geom_hline(yintercept = 25.5)



### generate network data

edge_data<-corplot2$data


vertex_data<- data.frame(unique(c(edge_data$Var1,edge_data$Var2)))
names(vertex_data)[1]<-"name"

vertex_data$Kingdom<- ifelse(grepl('F_', vertex_data$name), "Fungi", "Bacteria")

edge_data<-subset(edge_data, signif == 1)
edge_data<-subset(edge_data, value != 1)
edge_data<-edge_data[,c(1:3)]
names(edge_data)<-c("To", "From", "cor")

edge_data$weight2 <- abs(edge_data$cor)
edge_data$dir <- ifelse(edge_data$cor<0, "Neg", "Pos")
edge_data$weight<- as.numeric(rescale(edge_data$cor, c(0.01,0.2)))


g <- graph_from_data_frame(edge_data, directed = F, vertices = vertex_data)


n<-ggnetwork::fortify(g, layout = igraph::with_fr())

head(n)
n_full<-n


network<-ggplot(n_full, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges( aes(col = dir, alpha = weight2)) +
  theme_blank()+
  geom_nodes(aes(fill = Kingdom), pch = 21,  size = 5, col = "white")+
  scale_fill_manual(values = c("gold", "deepskyblue3"))+
  scale_color_manual(values = c("blue", "red"))+
  labs(col = "Direction")+
  theme(plot.margin=unit(c(2,1,1,1),"cm"))+
  guides(alpha = "none")+
  theme(legend.key.height = unit(0.005, "cm"))+
  theme(legend.position = c(0.9,0.98))+
  theme(legend.background = element_rect(fill="lightblue",
                                         linewidth=0.5, 
                                         linetype="solid", 
                                         colour ="lightblue"))

ggarrange(corplot1, network, ncol = 2, 
          labels = c("a)", "b)"), widths = c(1.5,1))

edge_data$weight<- as.numeric(rescale(edge_data$cor, c(0.01,0.05)))


network1<-ggplot(n_full, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges( aes(col = dir, alpha = weight2)) +
  theme_blank()+
  geom_nodes(aes(fill = Kingdom), pch = 21,  size = 5, col = "white")+
  scale_fill_manual(values = c("gold", "skyblue"))+
  scale_color_manual(values = c("blue", "red"))+
  labs(col = "Direction")+
  theme(plot.margin=unit(c(2,1,1,1),"cm"))+
  guides(alpha = "none")+
  theme(legend.key.height = unit(0.005, "cm"))+
  theme(legend.position = c(0.05,0.1))+
  theme(legend.background = element_rect(fill="lightblue",
                                         linewidth=0.5, 
                                         linetype="solid", 
                                         colour ="lightblue"))+
  geom_label(aes(label = name, fill = Kingdom), size = 2)


ggarrange(corplot1, network1, ncol = 2, 
          labels = c("a)", "b)"), widths = c(1.5,1))

5 Analysis: Effect of carotenoid treatment (Fig 7)

5.0.1 Beta diversity: lab samples

ps_lab_V4 <- subset_samples(V4_rare, TimePoint == "T1 (lab)")
ps_lab_ITS <- subset_samples(ITS_rare, TimePoint == "T1 (lab)")
####

wunifrac_dist<-distance(ps_lab_V4, method = "wunifrac")
otutable<-data.frame(t(ps_lab_V4@otu_table@.Data))
metadata <- data.frame(sample_data(ps_lab_V4))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
ggplot(metadata, aes(y = mass, x = clutch))+geom_boxplot()+geom_point()

Seq_depth <- as.numeric(scale(metadata$Seq_depth))
treatment <-metadata$treatment
clutch <- metadata$clutch
mass<-metadata$mass
summary(mass)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.650   2.175   2.390   2.400   2.605   3.310
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
final_model<-capscale(wunifrac_dist ~ 
                        
                        treatment+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 


anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
####### plot #####
####### plot #####
####### plot #####


## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ps_lab_V4))[,c("feature.id", "treatment")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########

centroids_df<-data.frame(final_model_df$centroids)
#centroids_df<-centroids_df[1:6,]
centroids_df
###########
###########
###########
###########

vectors_wunifrac1<-vectors_df
centroids_wunifrac1<-centroids_df

# colour palette
pal<-pals::stepped3()[c(1,5,9,13)]
#pal<-pals::tol()[c(1,3,4,12)]


p1<- ggplot(vectors_wunifrac1, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = treatment, col = treatment), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =treatment), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  theme_bw()+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  theme_light(base_size = 12)+
  labs(fill = "Carot. treatment", col = "Carot. treatment")


####### ITS ##########
####### ITS ##########
####### ITS ##########
####### ITS ##########


####

wunifrac_dist<-distance(ps_lab_ITS, method = "wunifrac")
otutable<-data.frame(t(ps_lab_ITS@otu_table@.Data))
metadata <- data.frame(sample_data(ps_lab_ITS))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "Sample_or_Control"
## [10] "is.neg"            "TimePoint"         "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
Seq_depth <- as.numeric(scale(metadata$Seq_depth))
treatment <-metadata$treatment


final_model<-capscale(wunifrac_dist ~ 
                        treatment+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 



anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ps_lab_V4))[,c("feature.id", "treatment")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########


centroids_df<-data.frame(final_model_df$centroids)
centroids_df
###########
###########
###########
###########

vectors_wunifrac2<-vectors_df
centroids_wunifrac2<-centroids_df

pal<-pals::stepped3()[c(1,5,9,13)]


p2<- ggplot(vectors_wunifrac2, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = treatment, col = treatment), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =treatment), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  
  theme_bw()+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  theme_light(base_size = 12)+
  labs(fill = "Carot. treatment", col = "Carot. treatment")



ggarrange(p1, p2, ncol = 2, common.legend = T, legend = "right", labels = c("a) Bacteria", "b) Fungi"))

5.0.2 Alpha diversity: lab samples

sample_data(ps_lab_V4)$Observed<-estimate_richness(ps_lab_V4, split = TRUE, measures = c("Observed"))$Observed

sample_data(ps_lab_ITS)$Observed<-estimate_richness(ps_lab_ITS, split = TRUE, measures = c("Observed"))$Observed


p3<-ggplot(sample_data(ps_lab_V4), aes(y = Observed, x = treatment))+
  geom_boxplot(fill = "skyblue")+
  geom_jitter(width = 0.2)+
  xlab("Treatment")+
  ylab("Bacterial ASV div.")+
  theme_light(base_size = 12)

p4<- ggplot(sample_data(ps_lab_ITS), aes(y = Observed, x = treatment))+
  geom_boxplot(fill = "skyblue")+
  geom_jitter(width = 0.2)+
  xlab("Treatment")+
  ylab("Fungal ASV div.")+
  theme_light(base_size = 12)


ggarrange(p3, p4, p1, p2, ncol = 2, nrow = 2, common.legend = T, labels = c("a)", "b)", "c)", "d)"), legend = "bottom")

### statistics

metadata_lab_V4<-data.frame(sample_data(ps_lab_V4))

head(metadata_lab_V4)
model_16s_lab<- glmmTMB::glmmTMB(Observed~treatment + Seq_depth, data = metadata_lab_V4)
summary(model_16s_lab)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ treatment + Seq_depth
## Data: metadata_lab_V4
## 
##      AIC      BIC   logLik deviance df.resid 
##    838.9    853.1   -413.5    826.9       72 
## 
## 
## Dispersion estimate for gaussian family (sigma^2): 2.35e+03 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 3.969e+01  1.311e+01   3.028  0.00246 ** 
## treatmentC1 8.669e+00  1.559e+01   0.556  0.57819    
## treatmentC2 6.374e+00  1.606e+01   0.397  0.69145    
## treatmentC3 1.289e+01  1.545e+01   0.834  0.40416    
## Seq_depth   7.623e-04  1.335e-04   5.709 1.14e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metadata_lab_ITS<-data.frame(sample_data(ps_lab_ITS))

model_ITS_lab<- glmmTMB::glmmTMB(Observed~treatment + Seq_depth, data = metadata_lab_ITS)
summary(model_ITS_lab)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ treatment + Seq_depth
## Data: metadata_lab_ITS
## 
##      AIC      BIC   logLik deviance df.resid 
##    601.5    616.3   -294.7    589.5       81 
## 
## 
## Dispersion estimate for gaussian family (sigma^2): 51.3 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 9.355e+00  1.485e+00   6.298 3.02e-10 ***
## treatmentC1 2.050e+00  2.091e+00   0.981    0.327    
## treatmentC2 1.594e+00  2.167e+00   0.736    0.462    
## treatmentC3 5.520e-01  2.205e+00   0.250    0.802    
## Seq_depth   1.059e-05  7.863e-06   1.347    0.178    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.0.3 Beta diversity: Rewilded samples

ps_wild_V4 <- subset_samples(V4_rare, TimePoint != "T1 (wild)")
ps_wild_ITS <- subset_samples(ITS_rare, TimePoint != "T1 (wild)")

####

wunifrac_dist<-distance(ps_wild_V4, method = "wunifrac")
otutable<-data.frame(t(ps_wild_V4@otu_table@.Data))
metadata <- data.frame(sample_data(ps_wild_V4))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "TimePoint"        
## [10] "Sample_or_Control" "is.neg"            "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
Seq_depth <- as.numeric(scale(metadata$Seq_depth))
treatment <-metadata$treatment
time_point<-metadata$TimePoint



final_model<-capscale(wunifrac_dist ~ 
                        treatment+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 


# weighted unifrac

anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
####### plot #####
####### plot #####
####### plot #####


## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ps_wild_V4))[,c("feature.id", "treatment")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########


centroids_df<-data.frame(final_model_df$centroids)
centroids_df
vectors_wunifrac1<-vectors_df
centroids_wunifrac1<-centroids_df

pal<-pals::stepped3()[c(1,5,9,13)]


p1<- ggplot(vectors_wunifrac1, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = treatment, col = treatment), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =treatment), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  
  theme_bw()+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  theme_light(base_size = 12)+
  labs(fill = "Carot. treatment", col = "Carot. treatment")


####### ITS ##########
####### ITS ##########
####### ITS ##########
####### ITS ##########


####

wunifrac_dist<-distance(ps_wild_ITS, method = "wunifrac")
otutable<-data.frame(t(ps_wild_ITS@otu_table@.Data))
metadata <- data.frame(sample_data(ps_wild_ITS))
names(metadata)
##  [1] "feature.id"        "date"              "frog_id"          
##  [4] "treatment"         "clutch"            "mass"             
##  [7] "location"          "sampling_event"    "Sample_or_Control"
## [10] "is.neg"            "TimePoint"         "Status"           
## [13] "mass_cat"          "Seq_depth"         "Observed"         
## [16] "Shannon"
Seq_depth <- as.numeric(scale(metadata$Seq_depth))
treatment <-metadata$treatment
time_point<-metadata$TimePoint




final_model<-capscale(wunifrac_dist ~ 
                        treatment+
                        Seq_depth,
                      env = metadata, 
                      comm = otutable) 



anova_wunifrac<-anova.cca(final_model, by="terms")
anova_wunifrac
## extract data from model

final_model_df<-scores(final_model)

# extract CAP scores

vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)


# merge with info on dominant family

sample_metadata<-data.frame(sample_data(ps_wild_V4))[,c("feature.id", "treatment")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")


#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########

centroids_df<-data.frame(final_model_df$centroids)
centroids_df
vectors_wunifrac2<-vectors_df
centroids_wunifrac2<-centroids_df

# colour palette
pal<-pals::stepped3()[c(1,5,9,13)]


p2<- ggplot(vectors_wunifrac2, aes(x = CAP1, y = CAP2))+
  
  stat_ellipse(geom = "polygon", aes(fill = treatment, col = treatment), level = 0.9, alpha = 0.3, size = 0.5)+
  geom_point(aes(fill =treatment), pch = 21, size = 3, alpha = 1, stroke = 1, col = "black")+
  
  theme_bw()+
  scale_fill_manual(values = pal)+
  scale_color_manual(values = pal)+
  
  theme_light(base_size = 12)+
  labs(fill = "Carot. treatment", col = "Carot. treatment")



ggarrange(p1, p2, ncol = 2, common.legend = T, legend = "right", labels = c("a) Bacteria", "b) Fungi"))

5.0.4 Alpha diversity: Rewilded samples

##### alpha diversity ###########
##### alpha diversity ###########
##### alpha diversity ###########
##### alpha diversity ###########

sample_data(ps_wild_V4)$Observed<-estimate_richness(ps_wild_V4, split = TRUE, measures = c("Observed"))$Observed

sample_data(ps_wild_ITS)$Observed<-estimate_richness(ps_wild_ITS, split = TRUE, measures = c("Observed"))$Observed


p3<-ggplot(sample_data(ps_wild_V4), aes(y = Observed, x = treatment))+
  geom_boxplot(fill = "skyblue")+
  geom_jitter(width = 0.2)+
  xlab("Treatment")+
  ylab("Bacterial ASV div.")+
  theme_light(base_size = 12)

p4<- ggplot(sample_data(ps_wild_ITS), aes(y = Observed, x = treatment))+
  geom_boxplot(fill = "skyblue")+
  geom_jitter(width = 0.2)+
  xlab("Treatment")+
  ylab("Fungal ASV div.")+
  theme_light(base_size = 12)

ggarrange(p3, p4, p1, p2, ncol = 2, nrow = 2, common.legend = T, labels = c("e)", "f)", "g)", "h)"), legend = "bottom")

### statistics

metadata_wild_V4<-data.frame(sample_data(ps_wild_V4))

head(metadata_wild_V4)
hist(metadata_wild_V4$Observed)

model_16s_wild<- glmmTMB::glmmTMB(Observed~treatment + Seq_depth, data = metadata_wild_V4)
summary(model_16s_wild)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ treatment + Seq_depth
## Data: metadata_wild_V4
## 
##      AIC      BIC   logLik deviance df.resid 
##   1559.1   1576.0   -773.6   1547.1      118 
## 
## 
## Dispersion estimate for gaussian family (sigma^2): 1.53e+04 
## 
## Conditional model:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.263e+02  2.600e+01   4.858 1.19e-06 ***
## treatmentC1  3.427e+01  3.179e+01   1.078    0.281    
## treatmentC2  1.473e+01  3.123e+01   0.472    0.637    
## treatmentC3 -1.294e+01  3.238e+01  -0.400    0.689    
## Seq_depth    1.637e-04  3.149e-04   0.520    0.603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data.frame(broom.mixed::tidy(model_16s_wild))
metadata_wild_ITS<-data.frame(sample_data(ps_wild_ITS))

model_ITS_wild<- glmmTMB::glmmTMB(Observed~treatment + Seq_depth, data = metadata_wild_ITS)
summary(model_ITS_wild)
##  Family: gaussian  ( identity )
## Formula:          Observed ~ treatment + Seq_depth
## Data: metadata_wild_ITS
## 
##      AIC      BIC   logLik deviance df.resid 
##   1304.6   1322.0   -646.3   1292.6      130 
## 
## 
## Dispersion estimate for gaussian family (sigma^2):  785 
## 
## Conditional model:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 2.372e+01  4.754e+00   4.990 6.05e-07 ***
## treatmentC1 5.488e+00  6.703e+00   0.819    0.413    
## treatmentC2 7.774e+00  6.609e+00   1.176    0.240    
## treatmentC3 3.926e-01  7.071e+00   0.056    0.956    
## Seq_depth   1.722e-05  2.745e-05   0.627    0.530    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Table S1

m1<- data.frame(broom.mixed::tidy(model_16s_lab))
m2<- data.frame(broom.mixed::tidy(model_ITS_lab))
m3<- data.frame(broom.mixed::tidy(model_16s_wild))
m4<- data.frame(broom.mixed::tidy(model_ITS_wild))

tableS1<-rbind(m1, m2, m3, m4)
write.table(tableS1, "TableS1.csv")

5.1 GLLVM: The effect of beta-carotene of common genera

We tested the effect of beta carotene on lab and wild samples separately. To do this, we first need CKR transform data and then merge bacterial and fungal data for 1) lab samples and 2) rewilded samples.

5.1.1 Data prep: Merge bacterial and fungal data for laboratory samples

V4_samples<-sample_names(frog_16S)
ITS_samples<-sample_names(frog_ITS)
samples_to_keep<-intersect(V4_samples,ITS_samples)

#frog_ITS<- frog_ITS %>% speedyseq::orient_taxa(as = "rows")


frog_16S_sub<-prune_samples(samples_to_keep, frog_16S)
frog_ITS_sub<-prune_samples(samples_to_keep, frog_ITS)

# glom by genus and get list of most common taxa per dataset

V4_genus<- tax_glom(frog_16S_sub, taxrank = "Genus")
ITS_genus<- tax_glom(frog_ITS_sub, taxrank = "Genus")

### filter

V4_genus<- subset_samples(V4_genus, Status == "Pre-release")
ITS_genus<- subset_samples(ITS_genus, Status == "Pre-release")


#########
#########


V4_top<-microbiome::aggregate_top_taxa(V4_genus, top = 42, level = "Genus")
ITS_top<-microbiome::aggregate_top_taxa(ITS_genus, top = 41, level = "Genus")

remove<-c("Other", "uncultured", "Kingdom_Fungi")

V4_to_keep<-taxa_names(V4_top)[!taxa_names(V4_top)%in% remove]
ITS_to_keep<-taxa_names(ITS_top)[!taxa_names(ITS_top)%in% remove]



#### CLR transform ###
#### CLR transform ###
#### CLR transform ###
#### CLR transform ###


V4_clr <-microbiome::transform(V4_genus, transform = "clr")
ITS_clr <-microbiome::transform(ITS_genus, transform = "clr")

## filter taxa

V4_clr<-subset_taxa(V4_clr, Genus %in% V4_to_keep)
ITS_clr<-subset_taxa(ITS_clr, Genus %in% ITS_to_keep)

## change names

taxa_names(V4_clr)<- data.frame(tax_table(V4_clr))$Genus
taxa_names(ITS_clr)<- data.frame(tax_table(ITS_clr))$Genus


### make merged otu table 

V4_otu<- data.frame(otu_table(V4_clr))
ITS_otu<- data.frame(otu_table(ITS_clr))

V4_otu[1:5,1:5]
ITS_otu[1:5,1:5]
names(V4_otu)<-sample_names(V4_clr)
names(ITS_otu)<-sample_names(ITS_clr)

full_otu<-rbind(V4_otu, ITS_otu)
full_otu<-otu_table(full_otu, taxa_are_rows = T)

# make merged tax table

V4_tax<- data.frame(tax_table(V4_clr))
ITS_tax<- data.frame(tax_table(ITS_clr))

full_tax<-rbind(V4_tax, ITS_tax)
full_tax2<-tax_table(full_tax)
taxa_names(full_tax2)<- row.names(full_tax)
colnames(full_tax2) <- c("Kingdom", "Phylum", "Class", 
                         "Order", "Family", "Genus", "Species")


### make merged metadata 

metadataV4<-data.frame(sample_data(V4_clr))
metadata<-sample_data(metadataV4)

## merge

merged_ps<-phyloseq(metadata, full_otu, full_tax2)
head(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                  Kingdom  Phylum         Class    Order   Family  Genus  Species
##                  <chr>    <chr>          <chr>    <chr>   <chr>   <chr>  <chr>  
## 1 Leucobacter    Bacteria Actinobacteria Actinob~ Microc~ Microb~ Leuco~ <NA>   
## 2 Microbacterium Bacteria Actinobacteria Actinob~ Microc~ Microb~ Micro~ <NA>   
## 3 Renibacterium  Bacteria Actinobacteria Actinob~ Microc~ Microc~ Renib~ <NA>   
## 4 Arthrobacter   Bacteria Actinobacteria Actinob~ Microc~ Microc~ Arthr~ <NA>   
## 5 Pseudomonas    Bacteria Proteobacteria Gammapr~ Pseudo~ Pseudo~ Pseud~ <NA>   
## 6 Acinetobacter  Bacteria Proteobacteria Gammapr~ Pseudo~ Moraxe~ Acine~ <NA>
tail(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                Kingdom Phylum   Class     Order     Family     Genus     Species
##                <chr>   <chr>    <chr>     <chr>     <chr>      <chr>     <chr>  
## 1 Rozellomyco~ Fungi   Rozello~ Rozellom~ Rozellom~ Rozellomy~ Rozellom~ <NA>   
## 2 Vishniacozy~ Fungi   Basidio~ Tremello~ Tremella~ Bulleriba~ Vishniac~ <NA>   
## 3 Cutaneotric~ Fungi   Basidio~ Tremello~ Trichosp~ Trichospo~ Cutaneot~ <NA>   
## 4 Apiotrichum  Fungi   Basidio~ Tremello~ Trichosp~ Trichospo~ Apiotric~ <NA>   
## 5 Papiliotrema Fungi   Basidio~ Tremello~ Tremella~ Rhynchoga~ Papiliot~ <NA>   
## 6 Naganishia   Fungi   Basidio~ Tremello~ Filobasi~ Filobasid~ Naganish~ <NA>
taxa_names(merged_ps)[1:40]<- paste("B", taxa_names(merged_ps)[1:40], sep = "_")
taxa_names(merged_ps)[41:80]<- paste("F", taxa_names(merged_ps)[41:80], sep = "_")

merged_lab_ps<-merged_ps

merged_lab_ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 80 taxa and 64 samples ]:
## sample_data() Sample Data:        [ 64 samples by 14 sample variables ]:
## tax_table()   Taxonomy Table:     [ 80 taxa by 7 taxonomic ranks ]:
## taxa are rows

5.1.2 Data prep: Merge bacterial and fungal data for rewilded samples

V4_samples<-sample_names(frog_16S)
ITS_samples<-sample_names(frog_ITS)
samples_to_keep<-intersect(V4_samples,ITS_samples)


frog_16S_sub<-prune_samples(samples_to_keep, frog_16S)
frog_ITS_sub<-prune_samples(samples_to_keep, frog_ITS)

# glom by genus and get list of most common taxa per dataset

V4_genus<- tax_glom(frog_16S_sub, taxrank = "Genus")
ITS_genus<- tax_glom(frog_ITS_sub, taxrank = "Genus")

### filter

V4_genus<- subset_samples(V4_genus, Status == "Post-release")
ITS_genus<- subset_samples(ITS_genus, Status == "Post-release")


#########
#########
#########
#########

V4_top<-microbiome::aggregate_top_taxa(V4_genus, top = 46, level = "Genus")
ITS_top<-microbiome::aggregate_top_taxa(ITS_genus, top = 41, level = "Genus")

remove<-c("Other", "uncultured", "Kingdom_Fungi", "uncultured bacterium")

V4_to_keep<-taxa_names(V4_top)[!taxa_names(V4_top)%in% remove]
ITS_to_keep<-taxa_names(ITS_top)[!taxa_names(ITS_top)%in% remove]

#### CLR transform ###
#### CLR transform ###
#### CLR transform ###
#### CLR transform ###


V4_clr <-microbiome::transform(V4_genus, transform = "clr")
ITS_clr <-microbiome::transform(ITS_genus, transform = "clr")

## filter taxa

V4_clr<-subset_taxa(V4_clr, Genus %in% V4_to_keep)
ITS_clr<-subset_taxa(ITS_clr, Genus %in% ITS_to_keep)

## change names

taxa_names(V4_clr)<- data.frame(tax_table(V4_clr))$Genus
taxa_names(ITS_clr)<- data.frame(tax_table(ITS_clr))$Genus


### make merged otu table 

V4_otu<- data.frame(otu_table(V4_clr))
ITS_otu<- data.frame(otu_table(ITS_clr))

V4_otu[1:5,1:5]
ITS_otu[1:5,1:5]
names(V4_otu)<-sample_names(V4_clr)
names(ITS_otu)<-sample_names(ITS_clr)

full_otu<-rbind(V4_otu, ITS_otu)
full_otu<-otu_table(full_otu, taxa_are_rows = T)

# make merged tax table

V4_tax<- data.frame(tax_table(V4_clr))
ITS_tax<- data.frame(tax_table(ITS_clr))

full_tax<-rbind(V4_tax, ITS_tax)
full_tax2<-tax_table(full_tax)
taxa_names(full_tax2)<- row.names(full_tax)
colnames(full_tax2) <- c("Kingdom", "Phylum", "Class", 
                         "Order", "Family", "Genus", "Species")


### make merged metadata 

metadataV4<-data.frame(sample_data(V4_clr))
metadata<-sample_data(metadataV4)

## merge

merged_ps<-phyloseq(metadata, full_otu, full_tax2)
head(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                    Kingdom  Phylum         Class   Order  Family  Genus  Species
##                    <chr>    <chr>          <chr>   <chr>  <chr>   <chr>  <chr>  
## 1 Curtobacterium   Bacteria Actinobacteria Actino~ Micro~ Microb~ Curto~ <NA>   
## 2 Amnibacterium    Bacteria Actinobacteria Actino~ Micro~ Microb~ Amnib~ <NA>   
## 3 Leifsonia        Bacteria Actinobacteria Actino~ Micro~ Microb~ Leifs~ <NA>   
## 4 Arthrobacter     Bacteria Actinobacteria Actino~ Micro~ Microc~ Arthr~ <NA>   
## 5 Paenarthrobacter Bacteria Actinobacteria Actino~ Micro~ Microc~ Paena~ <NA>   
## 6 Pseudonocardia   Bacteria Actinobacteria Actino~ Pseud~ Pseudo~ Pseud~ <NA>
tail(tax_table(merged_ps))
## Taxonomy Table:     [ 6 taxa by 7 taxonomic ranks ]:
##                 Kingdom Phylum        Class           Order Family Genus Species
##                 <chr>   <chr>         <chr>           <chr> <chr>  <chr> <chr>  
## 1 Papiliotrema  Fungi   Basidiomycota Tremellomycetes Trem~ Rhync~ Papi~ <NA>   
## 2 Saitozyma     Fungi   Basidiomycota Tremellomycetes Trem~ Trimo~ Sait~ <NA>   
## 3 Cryptococcus  Fungi   Basidiomycota Tremellomycetes Trem~ Crypt~ Cryp~ <NA>   
## 4 Solicoccozyma Fungi   Basidiomycota Tremellomycetes Filo~ Pisku~ Soli~ <NA>   
## 5 Filobasidium  Fungi   Basidiomycota Tremellomycetes Filo~ Filob~ Filo~ <NA>   
## 6 Naganishia    Fungi   Basidiomycota Tremellomycetes Filo~ Filob~ Naga~ <NA>
taxa_names(merged_ps)[1:40]<- paste("B", taxa_names(merged_ps)[1:40], sep = "_")
taxa_names(merged_ps)[41:80]<- paste("F", taxa_names(merged_ps)[41:80], sep = "_")

merged_wild_ps<-merged_ps

merged_wild_ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 80 taxa and 45 samples ]:
## sample_data() Sample Data:        [ 45 samples by 14 sample variables ]:
## tax_table()   Taxonomy Table:     [ 80 taxa by 7 taxonomic ranks ]:
## taxa are rows
# shared taxa

table(taxa_names(merged_wild_ps) %in% taxa_names(merged_lab_ps))
## 
## FALSE  TRUE 
##    61    19
taxa_names(merged_wild_ps)[taxa_names(merged_wild_ps) %in% taxa_names(merged_lab_ps)]
##  [1] "B_Arthrobacter"                              
##  [2] "B_Pseudomonas"                               
##  [3] "B_Burkholderia-Caballeronia-Paraburkholderia"
##  [4] "B_Massilia"                                  
##  [5] "B_Odoribacter"                               
##  [6] "B_Bacteroides"                               
##  [7] "B_Akkermansia"                               
##  [8] "B_Mycobacterium"                             
##  [9] "F_Talaromyces"                               
## [10] "F_Penicillium"                               
## [11] "F_Cladosporium"                              
## [12] "F_Sarocladium"                               
## [13] "F_Rhodotorula"                               
## [14] "F_Leucosporidium"                            
## [15] "F_Basidiobolus"                              
## [16] "F_Saitoella"                                 
## [17] "F_Vishniacozyma"                             
## [18] "F_Papiliotrema"                              
## [19] "F_Naganishia"

5.1.3 Fit GLLVM: Lab samples

ys <- data.frame(t(otu_table(merged_lab_ps)))
names(ys) <-taxa_names(merged_lab_ps)

ys<-rescale(as.matrix(ys),  to = c(-1,1))

Xs<-data.frame(sample_data(merged_lab_ps))


Xs$location<-factor(Xs$location)
table(Xs$TimePoint)
## 
## T1 (lab) 
##       64
str(Xs)
## 'data.frame':    64 obs. of  14 variables:
##  $ feature.id       : chr  "10-T0" "101-T2" "104-T3" "109-T2" ...
##  $ date             : Date, format: "2019-12-05" "2019-12-05" ...
##  $ frog_id          : chr  "10" "101" "104" "109" ...
##  $ treatment        : chr  "C0" "C2" "C3" "C2" ...
##  $ clutch           : chr  "B" "B" "C" "E" ...
##  $ mass             : num  2.58 2.58 2.13 2.13 2.76 3.16 1.88 2.54 2.28 2.41 ...
##  $ location         : Factor w/ 1 level "Lab": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sampling_event   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TimePoint        : Factor w/ 1 level "T1 (lab)": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sample_or_Control: chr  "True sample" "True sample" "True sample" "True sample" ...
##  $ is.neg           : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Status           : chr  "Pre-release" "Pre-release" "Pre-release" "Pre-release" ...
##  $ mass_cat         : chr  "High" "High" "Low" "Low" ...
##  $ Seq_depth        : num  14255 4663 38081 38198 78739 ...
Xs$frog_id<-factor(Xs$frog_id)
Xs$clutch<-factor(Xs$clutch)
Xs$mass_cat<-factor(Xs$mass_cat)
# fit model


fit_lab <- gllvm(ys, Xs, 
                 num.lv = 2,
                 formula = ~  treatment,
                 # row.eff = ~(1|clutch),
                 starting.val = "zero",
                 family = "gaussian")

coefplot(fit_lab, cex.ylab = 0.7)

5.1.3.1 Extract estimates and plot

df<-coef(fit_lab)

est_df<-data.frame(df$Intercept)

est_df2<-data.frame(df$Xcoef) # choose columns of interest

est_df3<-merge(est_df, est_df2, by = 0)

head(est_df3)
# order genera

row.names(est_df3)<-est_df3$Row.names
est_df3<-est_df3[colnames(ys),]

#put est_df3 into long format

names(est_df3)[1]<- "Genus"
names(est_df3)[2]<- "Intercept"

estimates_df <- gather(est_df3, Treatment, Estimate, names(est_df3)[2]:names(est_df3)[ncol(est_df3)], factor_key=TRUE)

############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####


confint_df<-data.frame(confint(fit_lab))


confint_df<-rbind(confint_df[rownames(confint_df) %like% "Xcoef", ],
                  confint_df[rownames(confint_df) %like% "Intercept", ])

head(confint_df)
# add a column with correct variable level
variables<- colnames(est_df3)[3:ncol(est_df3)]
variables<-c(variables, "Intercept")
variables1<-rep(variables, nrow(est_df))
variables2<-variables1[order(match(variables1, variables))]

#confint_df$Treatment<-c(rep("UU", 40), rep("CU", 40), rep("UC", 40), rep("CC", 40))
confint_df$Treatment<-variables2


# column with taxa names. Should be automatically in the correct order but double check

confint_df$Genus<-rep(colnames(ys), length(unique(confint_df$Treatment)))



# now have estimates, confidence intervals, aas seperate data frames, but they are in different formats. Need to massage them into one dataframe for plotting.

merged<-merge(estimates_df, confint_df, by = c("Treatment", "Genus"))


final_estimates_reduced<- merged
names(final_estimates_reduced)[4]<-"CI_lower"
names(final_estimates_reduced)[5]<-"CI_upper"

#final_estimates<- merged[,c(1,2,3,7,8)]
head(final_estimates_reduced)
unique(final_estimates_reduced$Treatment)
## [1] Intercept   treatmentC1 treatmentC2 treatmentC3
## Levels: Intercept treatmentC1 treatmentC2 treatmentC3
final_estimates_reduced2<-final_estimates_reduced

## add significance

final_estimates_reduced2$Sig<- !data.table::between(0, final_estimates_reduced2$CI_lower, final_estimates_reduced2$CI_upper)


final_estimates_reduced2$Genus<-factor(final_estimates_reduced2$Genus)

levels(final_estimates_reduced2$Treatment)
## [1] "Intercept"   "treatmentC1" "treatmentC2" "treatmentC3"
final_estimates_reduced2$Treatment<-factor(final_estimates_reduced2$Treatment)
levels(final_estimates_reduced2$Treatment)<-c("C0", "C1", "C2", "C3")



ggplot(subset(final_estimates_reduced2, Treatment != "C0"), aes(y = reorder(Genus, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  facet_wrap(~Treatment, nrow = 1, scales = "free_x")  + 
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

ggplot(subset(final_estimates_reduced2, Treatment != "C0"), aes(y = Estimate, x = Treatment, col = Sig))+
  geom_point()+
  facet_wrap(~Genus, scales = "free_y")  + 
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper, col = Sig), width = 0, size = 0.5)+
  geom_hline(yintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

## plot only significant

to_keep<- as.character(unique(subset(final_estimates_reduced2, Treatment != "C0" & Sig == T)$Genus))

estimates_sig<- subset(final_estimates_reduced2,  Genus %in% to_keep)



estimates_sig<-estimates_sig%>%arrange(Treatment, Genus)


estimates_sig$Genus<-factor(estimates_sig$Genus)


estimates_sig_lab<-estimates_sig

P5<-ggplot(subset(estimates_sig_lab, Treatment != "C0"), aes(y = reorder(Genus, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  facet_wrap(~Treatment, nrow = 1)  + 
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 12))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

ggplot(subset(estimates_sig_lab, Genus != "B_Arthrobacter"), aes(y = Estimate, x = Treatment, col = Sig))+
  geom_point()+
  facet_wrap(~Genus)  + 
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper, col = Sig), width = 0, size = 0.5)+
  geom_hline(yintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 10))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

5.1.4 Fit GLLVM: Rewilded samples

ys <- data.frame(t(otu_table(merged_wild_ps)))
names(ys) <-taxa_names(merged_wild_ps)

ys<-rescale(as.matrix(ys),  to = c(-1,1))


Xs<-data.frame(sample_data(merged_wild_ps))


Xs$location<-factor(Xs$location)
table(Xs$TimePoint)
## 
## T2 T3 
## 29 16
str(Xs)
## 'data.frame':    45 obs. of  14 variables:
##  $ feature.id       : chr  "R1" "R10" "R13" "R14" ...
##  $ date             : Date, format: "2020-02-12" "2020-02-12" ...
##  $ frog_id          : chr  "101" "10" "108" "113" ...
##  $ treatment        : chr  "C2" "C0" "C1" "C0" ...
##  $ clutch           : chr  "B" "B" "H" "B" ...
##  $ mass             : num  2.07 1.94 1.64 2.14 1.67 2.19 2.01 1.59 2.32 1.9 ...
##  $ location         : Factor w/ 3 levels "E1","E2","E3": 3 3 2 2 2 2 2 2 3 2 ...
##  $ sampling_event   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ TimePoint        : Factor w/ 2 levels "T2","T3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sample_or_Control: chr  "True sample" "True sample" "True sample" "True sample" ...
##  $ is.neg           : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Status           : chr  "Post-release" "Post-release" "Post-release" "Post-release" ...
##  $ mass_cat         : chr  "High" "High" "Low" "High" ...
##  $ Seq_depth        : num  2648 44015 10933 10529 7383 ...
Xs$frog_id<-factor(Xs$frog_id)

# fit model


fit_wild <- gllvm(ys, Xs, 
                  num.lv = 2,
                  formula = ~  treatment+TimePoint, 
                  starting.val = "zero",
                  family = "gaussian")

coefplot(fit_wild, cex.ylab = 0.7)

5.1.4.1 Extract estimates and plot

df<-coef(fit_wild)

est_df<-data.frame(df$Intercept)

est_df2<-data.frame(df$Xcoef) # choose columns of interest

est_df3<-merge(est_df, est_df2, by = 0)

head(est_df3)
# order genera

row.names(est_df3)<-est_df3$Row.names
est_df3<-est_df3[colnames(ys),]

#put est_df3 into long format

names(est_df3)[1]<- "Genus"
names(est_df3)[2]<- "Intercept"

estimates_df <- gather(est_df3, Treatment, Estimate, names(est_df3)[2]:names(est_df3)[ncol(est_df3)], factor_key=TRUE)

############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####
############# extract confindence intervals  ####


confint_df<-data.frame(confint(fit_wild))


confint_df<-rbind(confint_df[rownames(confint_df) %like% "Xcoef", ],
                  confint_df[rownames(confint_df) %like% "Intercept", ])

head(confint_df)
# add a column with correct variable level
variables<- colnames(est_df3)[3:ncol(est_df3)]
variables<-c(variables, "Intercept")
variables1<-rep(variables, nrow(est_df))
variables2<-variables1[order(match(variables1, variables))]

#confint_df$Treatment<-c(rep("UU", 40), rep("CU", 40), rep("UC", 40), rep("CC", 40))
confint_df$Treatment<-variables2


# column with taxa names. Should be automatically in the correct order but double check

confint_df$Genus<-rep(colnames(ys), length(unique(confint_df$Treatment)))



# now have estimates, confidence intervals, aas seperate data frames, but they are in different formats. Need to massage them into one dataframe for plotting.

merged<-merge(estimates_df, confint_df, by = c("Treatment", "Genus"))


final_estimates_reduced<- merged
names(final_estimates_reduced)[4]<-"CI_lower"
names(final_estimates_reduced)[5]<-"CI_upper"

#final_estimates<- merged[,c(1,2,3,7,8)]
head(final_estimates_reduced)
unique(final_estimates_reduced$Treatment)
## [1] Intercept   TimePointT3 treatmentC1 treatmentC2 treatmentC3
## Levels: Intercept treatmentC1 treatmentC2 treatmentC3 TimePointT3
final_estimates_reduced2<-final_estimates_reduced

## add significance

final_estimates_reduced2$Sig<- !data.table::between(0, final_estimates_reduced2$CI_lower, final_estimates_reduced2$CI_upper)


final_estimates_reduced2$Genus<-factor(final_estimates_reduced2$Genus)

levels(final_estimates_reduced2$Treatment)
## [1] "Intercept"   "treatmentC1" "treatmentC2" "treatmentC3" "TimePointT3"
final_estimates_reduced2$Treatment<-factor(final_estimates_reduced2$Treatment)
levels(final_estimates_reduced2$Treatment)<-c("C0", "C1", "C2", "C3", "T3")

final_estimates_reduced2<-subset(final_estimates_reduced2, Treatment != "T3" )

ggplot(subset(final_estimates_reduced2, Treatment != "C0" ), aes(y = reorder(Genus, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  facet_wrap(~Treatment, nrow = 1, scales = "free_x")  + 
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

ggplot(subset(final_estimates_reduced2, Treatment != "C0" & Treatment != "T3"), aes(y = Estimate, x = Treatment, col = Sig))+
  geom_point()+
  facet_wrap(~Genus, scales = "free_y")  + 
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper, col = Sig), width = 0, size = 0.5)+
  geom_hline(yintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

## plot only significant

to_keep<- as.character(unique(subset(final_estimates_reduced2, Treatment != "C0" & Sig == T )$Genus))

estimates_sig<- subset(final_estimates_reduced2,  Genus %in% to_keep)



estimates_sig<-estimates_sig%>%arrange(Treatment, Genus)


estimates_sig$Genus<-factor(estimates_sig$Genus)


estimates_sig_wild<-estimates_sig

P6<-ggplot(subset(estimates_sig_wild, Treatment != "C0"), aes(y = reorder(Genus, Estimate), x = Estimate, col = Sig))+
  geom_point()+
  facet_wrap(~Treatment, nrow = 1)  + 
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper, col = Sig), height = 0, size = 0.5)+
  geom_vline(xintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 12))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

ggplot(subset(estimates_sig_wild, Treatment != "UU"), aes(y = Estimate, x = Treatment, col = Sig))+
  geom_point()+
  facet_wrap(~Genus)  + 
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper, col = Sig), width = 0, size = 0.5)+
  geom_hline(yintercept = 0, linetype = "longdash")+
  theme_bw()+
  scale_color_manual(values = c("grey", "red"))+
  theme(axis.title.y = element_blank())+
  theme(axis.text.y = element_text(face="bold", size = 8))+
  theme(legend.position = "none")+
  theme(plot.margin=unit(c(0.2,0.2, 0.2, 0.6),"cm"))

5.2 Figure 7

ggarrange(p3, p4, p1, p2, P5, P6, ncol = 2, nrow = 3, common.legend = T, legend = "bottom")